summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h1
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_addr.c87
-rw-r--r--fs/9p/vfs_dir.c15
-rw-r--r--fs/9p/vfs_file.c326
-rw-r--r--fs/9p/xattr.c80
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Kconfig.binfmt3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/dir_fplus.c1
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/super.c20
-rw-r--r--fs/affs/affs.h28
-rw-r--r--fs/affs/amigaffs.c3
-rw-r--r--fs/affs/file.c34
-rw-r--r--fs/affs/inode.c32
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/affs/super.c43
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/misc.c16
-rw-r--r--fs/afs/rxrpc.c5
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c278
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/befs/befs.h22
-rw-r--r--fs/befs/datastream.c4
-rw-r--r--fs/befs/io.c2
-rw-r--r--fs/befs/linuxvfs.c16
-rw-r--r--fs/befs/super.c4
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_elf.c31
-rw-r--r--fs/binfmt_misc.c30
-rw-r--r--fs/block_dev.c24
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file.c117
-rw-r--r--fs/btrfs/inode.c137
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/send.c171
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/transaction.c42
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c9
-rw-r--r--fs/btrfs/xattr.c8
-rw-r--r--fs/buffer.c4
-rw-r--r--fs/ceph/addr.c41
-rw-r--r--fs/ceph/caps.c51
-rw-r--r--fs/ceph/dir.c48
-rw-r--r--fs/ceph/file.c27
-rw-r--r--fs/ceph/mds_client.c61
-rw-r--r--fs/ceph/strings.c1
-rw-r--r--fs/ceph/super.c56
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c23
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/connect.c19
-rw-r--r--fs/cifs/file.c94
-rw-r--r--fs/cifs/inode.c2
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/coda/file.c38
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c44
-rw-r--r--fs/dcache.c49
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/direct-io.c44
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h4
-rw-r--r--fs/ecryptfs/file.c43
-rw-r--r--fs/ecryptfs/keystore.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/exec.c88
-rw-r--r--fs/exofs/file.c2
-rw-r--r--fs/exofs/inode.c4
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/file.c21
-rw-r--r--fs/ext2/inode.c18
-rw-r--r--fs/ext2/namei.c10
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/inode.c16
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext3/xattr.c3
-rw-r--r--fs/ext4/Kconfig17
-rw-r--r--fs/ext4/Makefile4
-rw-r--r--fs/ext4/acl.c5
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/crypto.c558
-rw-r--r--fs/ext4/crypto_fname.c709
-rw-r--r--fs/ext4/crypto_key.c165
-rw-r--r--fs/ext4/crypto_policy.c194
-rw-r--r--fs/ext4/dir.c81
-rw-r--r--fs/ext4/ext4.h174
-rw-r--r--fs/ext4/ext4_crypto.h147
-rw-r--r--fs/ext4/extents.c81
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c77
-rw-r--r--fs/ext4/fsync.c1
-rw-r--r--fs/ext4/hash.c1
-rw-r--r--fs/ext4/ialloc.c28
-rw-r--r--fs/ext4/indirect.c27
-rw-r--r--fs/ext4/inline.c16
-rw-r--r--fs/ext4/inode.c164
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/namei.c647
-rw-r--r--fs/ext4/page-io.c48
-rw-r--r--fs/ext4/readpage.c328
-rw-r--r--fs/ext4/super.c58
-rw-r--r--fs/ext4/symlink.c97
-rw-r--r--fs/ext4/xattr.c4
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/f2fs/Kconfig2
-rw-r--r--fs/f2fs/acl.c14
-rw-r--r--fs/f2fs/checkpoint.c38
-rw-r--r--fs/f2fs/data.c766
-rw-r--r--fs/f2fs/debug.c22
-rw-r--r--fs/f2fs/dir.c93
-rw-r--r--fs/f2fs/f2fs.h174
-rw-r--r--fs/f2fs/file.c66
-rw-r--r--fs/f2fs/gc.c6
-rw-r--r--fs/f2fs/inline.c69
-rw-r--r--fs/f2fs/inode.c25
-rw-r--r--fs/f2fs/namei.c81
-rw-r--r--fs/f2fs/node.c18
-rw-r--r--fs/f2fs/node.h1
-rw-r--r--fs/f2fs/recovery.c76
-rw-r--r--fs/f2fs/segment.c17
-rw-r--r--fs/f2fs/segment.h3
-rw-r--r--fs/f2fs/super.c40
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/fat.h5
-rw-r--r--fs/fat/fatent.c3
-rw-r--r--fs/fat/file.c6
-rw-r--r--fs/fat/inode.c23
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fat/namei_msdos.c2
-rw-r--r--fs/fat/namei_vfat.c2
-rw-r--r--fs/file.c3
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs-writeback.c93
-rw-r--r--fs/fs_pin.c4
-rw-r--r--fs/fuse/cuse.c27
-rw-r--r--fs/fuse/dev.c83
-rw-r--r--fs/fuse/file.c151
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/gfs2/acl.c6
-rw-r--r--fs/gfs2/aops.c24
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c108
-rw-r--r--fs/gfs2/glock.c47
-rw-r--r--fs/gfs2/incore.h4
-rw-r--r--fs/gfs2/inode.c18
-rw-r--r--fs/gfs2/quota.c90
-rw-r--r--fs/gfs2/quota.h8
-rw-r--r--fs/gfs2/rgrp.c20
-rw-r--r--fs/gfs2/rgrp.h3
-rw-r--r--fs/gfs2/xattr.c2
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c12
-rw-r--r--fs/hfsplus/bfind.c4
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/hfsplus/catalog.c3
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c19
-rw-r--r--fs/hfsplus/ioctl.c12
-rw-r--r--fs/hfsplus/xattr.c86
-rw-r--r--fs/hfsplus/xattr.h22
-rw-r--r--fs/hfsplus/xattr_security.c38
-rw-r--r--fs/hfsplus/xattr_trusted.c37
-rw-r--r--fs/hfsplus/xattr_user.c35
-rw-r--r--fs/hostfs/hostfs.h6
-rw-r--r--fs/hostfs/hostfs_kern.c114
-rw-r--r--fs/hostfs/hostfs_user.c29
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hugetlbfs/inode.c183
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c10
-rw-r--r--fs/jfs/jfs_metapage.c31
-rw-r--r--fs/jfs/jfs_metapage.h1
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/file.c1
-rw-r--r--fs/locks.c104
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/namei.c176
-rw-r--r--fs/namespace.c142
-rw-r--r--fs/ncpfs/file.c88
-rw-r--r--fs/ncpfs/ncplib_kernel.c6
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c45
-rw-r--r--fs/nfs/dir.c22
-rw-r--r--fs/nfs/direct.c40
-rw-r--r--fs/nfs/file.c29
-rw-r--r--fs/nfs/inode.c111
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs4client.c9
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c31
-rw-r--r--fs/nfs/nfs4session.h1
-rw-r--r--fs/nfs/nfs4state.c18
-rw-r--r--fs/nfs/proc.c6
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/write.c35
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nfsd/blocklayout.c2
-rw-r--r--fs/nfsd/blocklayoutxdr.c6
-rw-r--r--fs/nfsd/nfs4layouts.c12
-rw-r--r--fs/nfsd/nfs4proc.c2
-rw-r--r--fs/nfsd/nfs4state.c24
-rw-r--r--fs/nfsd/nfs4xdr.c20
-rw-r--r--fs/nfsd/nfscache.c6
-rw-r--r--fs/nilfs2/alloc.c5
-rw-r--r--fs/nilfs2/bmap.c48
-rw-r--r--fs/nilfs2/bmap.h13
-rw-r--r--fs/nilfs2/btree.c110
-rw-r--r--fs/nilfs2/cpfile.c58
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/file.c2
-rw-r--r--fs/nilfs2/inode.c37
-rw-r--r--fs/nilfs2/mdt.c54
-rw-r--r--fs/nilfs2/mdt.h10
-rw-r--r--fs/nilfs2/page.c24
-rw-r--r--fs/nilfs2/segment.c24
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/notify/fanotify/fanotify.c3
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/file.c778
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/alloc.c48
-rw-r--r--fs/ocfs2/aops.c178
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/cluster/masklog.h5
-rw-r--r--fs/ocfs2/dir.c15
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/file.c147
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h15
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c8
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/super.c37
-rw-r--r--fs/ocfs2/xattr.c8
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/open.c9
-rw-r--r--fs/overlayfs/super.c33
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/pnode.c60
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/proc/array.c26
-rw-r--r--fs/proc/base.c82
-rw-r--r--fs/proc/fd.c27
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/pstore/inode.c3
-rw-r--r--fs/pstore/ram.c3
-rw-r--r--fs/quota/dquot.c151
-rw-r--r--fs/quota/quota.c217
-rw-r--r--fs/quota/quota_tree.c7
-rw-r--r--fs/quota/quota_v2.c12
-rw-r--r--fs/quota/quotaio_v2.h6
-rw-r--r--fs/ramfs/file-mmu.c2
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/read_write.c213
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c10
-rw-r--r--fs/reiserfs/reiserfs.h1
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/romfs/mmap-nommu.c1
-rw-r--r--fs/splice.c31
-rw-r--r--fs/stat.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/group.c11
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/tracefs/Makefile4
-rw-r--r--fs/tracefs/inode.c650
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/commit.c12
-rw-r--r--fs/ubifs/compress.c22
-rw-r--r--fs/ubifs/debug.c186
-rw-r--r--fs/ubifs/dir.c23
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/io.c40
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c17
-rw-r--r--fs/ubifs/log.c4
-rw-r--r--fs/ubifs/lprops.c62
-rw-r--r--fs/ubifs/lpt.c59
-rw-r--r--fs/ubifs/lpt_commit.c34
-rw-r--r--fs/ubifs/master.c6
-rw-r--r--fs/ubifs/orphan.c26
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/replay.c34
-rw-r--r--fs/ubifs/sb.c30
-rw-r--r--fs/ubifs/scan.c24
-rw-r--r--fs/ubifs/super.c107
-rw-r--r--fs/ubifs/tnc.c20
-rw-r--r--fs/ubifs/tnc_commit.c12
-rw-r--r--fs/ubifs/tnc_misc.c24
-rw-r--r--fs/ubifs/ubifs.h40
-rw-r--r--fs/ubifs/xattr.c18
-rw-r--r--fs/udf/balloc.c20
-rw-r--r--fs/udf/dir.c1
-rw-r--r--fs/udf/directory.c1
-rw-r--r--fs/udf/file.c30
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/udf/misc.c1
-rw-r--r--fs/udf/namei.c10
-rw-r--r--fs/udf/partition.c1
-rw-r--r--fs/udf/super.c1
-rw-r--r--fs/udf/symlink.c1
-rw-r--r--fs/udf/truncate.c1
-rw-r--r--fs/ufs/file.c2
-rw-r--r--fs/xfs/xfs_aops.c13
-rw-r--r--fs/xfs/xfs_file.c54
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode.h9
-rw-r--r--fs/xfs/xfs_iops.c36
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_qm.c5
-rw-r--r--fs/xfs/xfs_qm.h4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c176
-rw-r--r--fs/xfs/xfs_quotaops.c117
346 files changed, 10350 insertions, 4702 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 099c7712631c..fb9ffcb43277 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -78,7 +78,6 @@ enum p9_cache_modes {
* @cache: cache mode of type &p9_cache_modes
* @cachetag: the tag of the cache associated with this session
* @fscache: session cookie associated with FS-Cache
- * @options: copy of options string given by user
* @uname: string user name to mount hierarchy as
* @aname: mount specifier for remote hierarchy
* @maxdata: maximum data to be sent/recvd per protocol message
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b83ebfbf3fdc..5a0db6dec8d1 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -68,14 +68,10 @@ int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
-ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
-ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
int datasync);
-ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
- const char __user *, size_t, loff_t *, int);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
static inline void v9fs_invalidate_inode_attr(struct inode *inode)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e055ea83..e9e04376c52c 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
#include <linux/pagemap.h>
#include <linux/idr.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -51,12 +51,11 @@
*/
static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
{
- int retval;
- loff_t offset;
- char *buffer;
- struct inode *inode;
+ struct inode *inode = page->mapping->host;
+ struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
+ struct iov_iter to;
+ int retval, err;
- inode = page->mapping->host;
p9_debug(P9_DEBUG_VFS, "\n");
BUG_ON(!PageLocked(page));
@@ -65,16 +64,16 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
if (retval == 0)
return retval;
- buffer = kmap(page);
- offset = page_offset(page);
+ iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
- retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
- if (retval < 0) {
+ retval = p9_client_read(fid, page_offset(page), &to, &err);
+ if (err) {
v9fs_uncache_page(inode, page);
+ retval = err;
goto done;
}
- memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
+ zero_user(page, retval, PAGE_SIZE - retval);
flush_dcache_page(page);
SetPageUptodate(page);
@@ -82,7 +81,6 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
retval = 0;
done:
- kunmap(page);
unlock_page(page);
return retval;
}
@@ -161,41 +159,32 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
static int v9fs_vfs_writepage_locked(struct page *page)
{
- char *buffer;
- int retval, len;
- loff_t offset, size;
- mm_segment_t old_fs;
- struct v9fs_inode *v9inode;
struct inode *inode = page->mapping->host;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ loff_t size = i_size_read(inode);
+ struct iov_iter from;
+ struct bio_vec bvec;
+ int err, len;
- v9inode = V9FS_I(inode);
- size = i_size_read(inode);
if (page->index == size >> PAGE_CACHE_SHIFT)
len = size & ~PAGE_CACHE_MASK;
else
len = PAGE_CACHE_SIZE;
- set_page_writeback(page);
-
- buffer = kmap(page);
- offset = page_offset(page);
+ bvec.bv_page = page;
+ bvec.bv_offset = 0;
+ bvec.bv_len = len;
+ iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
- old_fs = get_fs();
- set_fs(get_ds());
/* We should have writeback_fid always set */
BUG_ON(!v9inode->writeback_fid);
- retval = v9fs_file_write_internal(inode,
- v9inode->writeback_fid,
- (__force const char __user *)buffer,
- len, &offset, 0);
- if (retval > 0)
- retval = 0;
+ set_page_writeback(page);
+
+ p9_client_write(v9inode->writeback_fid, page_offset(page), &from, &err);
- set_fs(old_fs);
- kunmap(page);
end_page_writeback(page);
- return retval;
+ return err;
}
static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -241,11 +230,8 @@ static int v9fs_launder_page(struct page *page)
/**
* v9fs_direct_IO - 9P address space operation for direct I/O
- * @rw: direction (read or write)
* @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
* @pos: offset in file to begin the operation
- * @nr_segs: size of iovec array
*
* The presence of v9fs_direct_IO() in the address space ops vector
* allowes open() O_DIRECT flags which would have failed otherwise.
@@ -259,18 +245,23 @@ static int v9fs_launder_page(struct page *page)
*
*/
static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
+v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
- /*
- * FIXME
- * Now that we do caching with cache mode enabled, We need
- * to support direct IO
- */
- p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n",
- iocb->ki_filp,
- (long long)pos, iter->nr_segs);
-
- return -EINVAL;
+ struct file *file = iocb->ki_filp;
+ ssize_t n;
+ int err = 0;
+ if (iov_iter_rw(iter) == WRITE) {
+ n = p9_client_write(file->private_data, pos, iter, &err);
+ if (n) {
+ struct inode *inode = file_inode(file);
+ loff_t i_size = i_size_read(inode);
+ if (pos + n > i_size)
+ inode_add_bytes(inode, pos + n - i_size);
+ }
+ } else {
+ n = p9_client_read(file->private_data, pos, iter, &err);
+ }
+ return n ? n : err;
}
static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 4f1151088ebe..76c3b1ab6361 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -33,6 +33,7 @@
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/slab.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -115,6 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
int buflen;
int reclen = 0;
struct p9_rdir *rdir;
+ struct kvec kvec;
p9_debug(P9_DEBUG_VFS, "name %pD\n", file);
fid = file->private_data;
@@ -124,16 +126,21 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
rdir = v9fs_alloc_rdir_buf(file, buflen);
if (!rdir)
return -ENOMEM;
+ kvec.iov_base = rdir->buf;
+ kvec.iov_len = buflen;
while (1) {
if (rdir->tail == rdir->head) {
- err = v9fs_file_readn(file, rdir->buf, NULL,
- buflen, ctx->pos);
- if (err <= 0)
+ struct iov_iter to;
+ int n;
+ iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+ n = p9_client_read(file->private_data, ctx->pos, &to,
+ &err);
+ if (err)
return err;
rdir->head = 0;
- rdir->tail = err;
+ rdir->tail = n;
}
while (rdir->head < rdir->tail) {
p9stat_init(&st);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b40133796b87..1ef16bd8280b 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -36,6 +36,8 @@
#include <linux/utsname.h>
#include <asm/uaccess.h>
#include <linux/idr.h>
+#include <linux/uio.h>
+#include <linux/slab.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -149,7 +151,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct p9_flock flock;
struct p9_fid *fid;
- uint8_t status;
+ uint8_t status = P9_LOCK_ERROR;
int res = 0;
unsigned char fl_type;
@@ -194,7 +196,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
for (;;) {
res = p9_client_lock_dotl(fid, &flock, &status);
if (res < 0)
- break;
+ goto out_unlock;
if (status != P9_LOCK_BLOCKED)
break;
@@ -212,14 +214,16 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
case P9_LOCK_BLOCKED:
res = -EAGAIN;
break;
+ default:
+ WARN_ONCE(1, "unknown lock status code: %d\n", status);
+ /* fallthough */
case P9_LOCK_ERROR:
case P9_LOCK_GRACE:
res = -ENOLCK;
break;
- default:
- BUG();
}
+out_unlock:
/*
* incase server returned error for lock request, revert
* it locally
@@ -285,6 +289,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
fl->fl_end = glock.start + glock.length - 1;
fl->fl_pid = glock.proc_id;
}
+ kfree(glock.client_id);
return res;
}
@@ -364,63 +369,6 @@ out_err:
}
/**
- * v9fs_fid_readn - read from a fid
- * @fid: fid to read
- * @data: data buffer to read data into
- * @udata: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-ssize_t
-v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
- u64 offset)
-{
- int n, total, size;
-
- p9_debug(P9_DEBUG_VFS, "fid %d offset %llu count %d\n",
- fid->fid, (long long unsigned)offset, count);
- n = 0;
- total = 0;
- size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
- do {
- n = p9_client_read(fid, data, udata, offset, count);
- if (n <= 0)
- break;
-
- if (data)
- data += n;
- if (udata)
- udata += n;
-
- offset += n;
- count -= n;
- total += n;
- } while (count > 0 && n == size);
-
- if (n < 0)
- total = n;
-
- return total;
-}
-
-/**
- * v9fs_file_readn - read from a file
- * @filp: file pointer to read
- * @data: data buffer to read data into
- * @udata: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-ssize_t
-v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
- u64 offset)
-{
- return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
-}
-
-/**
* v9fs_file_read - read from a file
* @filp: file pointer to read
* @udata: user data buffer to read data into
@@ -430,69 +378,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
*/
static ssize_t
-v9fs_file_read(struct file *filp, char __user *udata, size_t count,
- loff_t * offset)
+v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- int ret;
- struct p9_fid *fid;
- size_t size;
-
- p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
- fid = filp->private_data;
+ struct p9_fid *fid = iocb->ki_filp->private_data;
+ int ret, err;
- size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
- if (count > size)
- ret = v9fs_file_readn(filp, NULL, udata, count, *offset);
- else
- ret = p9_client_read(fid, NULL, udata, *offset, count);
+ p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
+ iov_iter_count(to), iocb->ki_pos);
- if (ret > 0)
- *offset += ret;
+ ret = p9_client_read(fid, iocb->ki_pos, to, &err);
+ if (!ret)
+ return err;
+ iocb->ki_pos += ret;
return ret;
}
-ssize_t
-v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
- const char __user *data, size_t count,
- loff_t *offset, int invalidate)
-{
- int n;
- loff_t i_size;
- size_t total = 0;
- loff_t origin = *offset;
- unsigned long pg_start, pg_end;
-
- p9_debug(P9_DEBUG_VFS, "data %p count %d offset %x\n",
- data, (int)count, (int)*offset);
-
- do {
- n = p9_client_write(fid, NULL, data+total, origin+total, count);
- if (n <= 0)
- break;
- count -= n;
- total += n;
- } while (count > 0);
-
- if (invalidate && (total > 0)) {
- pg_start = origin >> PAGE_CACHE_SHIFT;
- pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
- if (inode->i_mapping && inode->i_mapping->nrpages)
- invalidate_inode_pages2_range(inode->i_mapping,
- pg_start, pg_end);
- *offset += total;
- i_size = i_size_read(inode);
- if (*offset > i_size) {
- inode_add_bytes(inode, *offset - i_size);
- i_size_write(inode, *offset);
- }
- }
- if (n < 0)
- return n;
-
- return total;
-}
-
/**
* v9fs_file_write - write to a file
* @filp: file pointer to write
@@ -502,35 +403,39 @@ v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
*
*/
static ssize_t
-v9fs_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offset)
+v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- ssize_t retval = 0;
- loff_t origin = *offset;
-
-
- retval = generic_write_checks(filp, &origin, &count, 0);
- if (retval)
- goto out;
+ struct file *file = iocb->ki_filp;
+ ssize_t retval;
+ loff_t origin;
+ int err = 0;
- retval = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- retval = 0;
- if (!count)
- goto out;
+ retval = generic_write_checks(iocb, from);
+ if (retval <= 0)
+ return retval;
- retval = v9fs_file_write_internal(file_inode(filp),
- filp->private_data,
- data, count, &origin, 1);
- /* update offset on successful write */
- if (retval > 0)
- *offset = origin;
-out:
- return retval;
+ origin = iocb->ki_pos;
+ retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err);
+ if (retval > 0) {
+ struct inode *inode = file_inode(file);
+ loff_t i_size;
+ unsigned long pg_start, pg_end;
+ pg_start = origin >> PAGE_CACHE_SHIFT;
+ pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
+ if (inode->i_mapping && inode->i_mapping->nrpages)
+ invalidate_inode_pages2_range(inode->i_mapping,
+ pg_start, pg_end);
+ iocb->ki_pos += retval;
+ i_size = i_size_read(inode);
+ if (iocb->ki_pos > i_size) {
+ inode_add_bytes(inode, iocb->ki_pos - i_size);
+ i_size_write(inode, iocb->ki_pos);
+ }
+ return retval;
+ }
+ return err;
}
-
static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
int datasync)
{
@@ -657,44 +562,6 @@ out_unlock:
return VM_FAULT_NOPAGE;
}
-static ssize_t
-v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
- loff_t *offsetp)
-{
- loff_t size, offset;
- struct inode *inode;
- struct address_space *mapping;
-
- offset = *offsetp;
- mapping = filp->f_mapping;
- inode = mapping->host;
- if (!count)
- return 0;
- size = i_size_read(inode);
- if (offset < size)
- filemap_write_and_wait_range(mapping, offset,
- offset + count - 1);
-
- return v9fs_file_read(filp, udata, count, offsetp);
-}
-
-/**
- * v9fs_cached_file_read - read from a file
- * @filp: file pointer to read
- * @data: user data buffer to read data into
- * @count: size of buffer
- * @offset: offset at which to read data
- *
- */
-static ssize_t
-v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
- loff_t *offset)
-{
- if (filp->f_flags & O_DIRECT)
- return v9fs_direct_read(filp, data, count, offset);
- return new_sync_read(filp, data, count, offset);
-}
-
/**
* v9fs_mmap_file_read - read from a file
* @filp: file pointer to read
@@ -704,84 +571,12 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
*
*/
static ssize_t
-v9fs_mmap_file_read(struct file *filp, char __user *data, size_t count,
- loff_t *offset)
+v9fs_mmap_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
/* TODO: Check if there are dirty pages */
- return v9fs_file_read(filp, data, count, offset);
-}
-
-static ssize_t
-v9fs_direct_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offsetp)
-{
- loff_t offset;
- ssize_t retval;
- struct inode *inode;
- struct address_space *mapping;
-
- offset = *offsetp;
- mapping = filp->f_mapping;
- inode = mapping->host;
- if (!count)
- return 0;
-
- mutex_lock(&inode->i_mutex);
- retval = filemap_write_and_wait_range(mapping, offset,
- offset + count - 1);
- if (retval)
- goto err_out;
- /*
- * After a write we want buffered reads to be sure to go to disk to get
- * the new data. We invalidate clean cached page from the region we're
- * about to write. We do this *before* the write so that if we fail
- * here we fall back to buffered write
- */
- if (mapping->nrpages) {
- pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
- pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
-
- retval = invalidate_inode_pages2_range(mapping,
- pg_start, pg_end);
- /*
- * If a page can not be invalidated, fall back
- * to buffered write.
- */
- if (retval) {
- if (retval == -EBUSY)
- goto buff_write;
- goto err_out;
- }
- }
- retval = v9fs_file_write(filp, data, count, offsetp);
-err_out:
- mutex_unlock(&inode->i_mutex);
- return retval;
-
-buff_write:
- mutex_unlock(&inode->i_mutex);
- return new_sync_write(filp, data, count, offsetp);
-}
-
-/**
- * v9fs_cached_file_write - write to a file
- * @filp: file pointer to write
- * @data: data buffer to write data from
- * @count: size of buffer
- * @offset: offset at which to write data
- *
- */
-static ssize_t
-v9fs_cached_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t *offset)
-{
-
- if (filp->f_flags & O_DIRECT)
- return v9fs_direct_write(filp, data, count, offset);
- return new_sync_write(filp, data, count, offset);
+ return v9fs_file_read_iter(iocb, to);
}
-
/**
* v9fs_mmap_file_write - write to a file
* @filp: file pointer to write
@@ -791,14 +586,13 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
*
*/
static ssize_t
-v9fs_mmap_file_write(struct file *filp, const char __user *data,
- size_t count, loff_t *offset)
+v9fs_mmap_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
/*
* TODO: invalidate mmaps on filp's inode between
* offset and offset+count
*/
- return v9fs_file_write(filp, data, count, offset);
+ return v9fs_file_write_iter(iocb, from);
}
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
@@ -843,8 +637,6 @@ static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {
const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_cached_file_read,
- .write = v9fs_cached_file_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = v9fs_file_open,
@@ -856,8 +648,6 @@ const struct file_operations v9fs_cached_file_operations = {
const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_cached_file_read,
- .write = v9fs_cached_file_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = v9fs_file_open,
@@ -870,8 +660,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_file_read,
- .write = v9fs_file_write,
+ .read_iter = v9fs_file_read_iter,
+ .write_iter = v9fs_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
@@ -881,8 +671,8 @@ const struct file_operations v9fs_file_operations = {
const struct file_operations v9fs_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_file_read,
- .write = v9fs_file_write,
+ .read_iter = v9fs_file_read_iter,
+ .write_iter = v9fs_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
@@ -893,8 +683,8 @@ const struct file_operations v9fs_file_operations_dotl = {
const struct file_operations v9fs_mmap_file_operations = {
.llseek = generic_file_llseek,
- .read = v9fs_mmap_file_read,
- .write = v9fs_mmap_file_write,
+ .read_iter = v9fs_mmap_file_read_iter,
+ .write_iter = v9fs_mmap_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
@@ -904,8 +694,8 @@ const struct file_operations v9fs_mmap_file_operations = {
const struct file_operations v9fs_mmap_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = v9fs_mmap_file_read,
- .write = v9fs_mmap_file_write,
+ .read_iter = v9fs_mmap_file_read_iter,
+ .write_iter = v9fs_mmap_file_write_iter,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f95e01e058e4..0cf44b6cccd6 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/sched.h>
+#include <linux/uio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
@@ -25,50 +26,34 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
void *buffer, size_t buffer_size)
{
ssize_t retval;
- int msize, read_count;
- u64 offset = 0, attr_size;
+ u64 attr_size;
struct p9_fid *attr_fid;
+ struct kvec kvec = {.iov_base = buffer, .iov_len = buffer_size};
+ struct iov_iter to;
+ int err;
+
+ iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
if (IS_ERR(attr_fid)) {
retval = PTR_ERR(attr_fid);
p9_debug(P9_DEBUG_VFS, "p9_client_attrwalk failed %zd\n",
retval);
- attr_fid = NULL;
- goto error;
- }
- if (!buffer_size) {
- /* request to get the attr_size */
- retval = attr_size;
- goto error;
+ return retval;
}
if (attr_size > buffer_size) {
- retval = -ERANGE;
- goto error;
- }
- msize = attr_fid->clnt->msize;
- while (attr_size) {
- if (attr_size > (msize - P9_IOHDRSZ))
- read_count = msize - P9_IOHDRSZ;
+ if (!buffer_size) /* request to get the attr_size */
+ retval = attr_size;
else
- read_count = attr_size;
- read_count = p9_client_read(attr_fid, ((char *)buffer)+offset,
- NULL, offset, read_count);
- if (read_count < 0) {
- /* error in xattr read */
- retval = read_count;
- goto error;
- }
- offset += read_count;
- attr_size -= read_count;
+ retval = -ERANGE;
+ } else {
+ iov_iter_truncate(&to, attr_size);
+ retval = p9_client_read(attr_fid, 0, &to, &err);
+ if (err)
+ retval = err;
}
- /* Total read xattr bytes */
- retval = offset;
-error:
- if (attr_fid)
- p9_client_clunk(attr_fid);
+ p9_client_clunk(attr_fid);
return retval;
-
}
@@ -120,8 +105,11 @@ int v9fs_xattr_set(struct dentry *dentry, const char *name,
int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
const void *value, size_t value_len, int flags)
{
- u64 offset = 0;
- int retval, msize, write_count;
+ struct kvec kvec = {.iov_base = (void *)value, .iov_len = value_len};
+ struct iov_iter from;
+ int retval;
+
+ iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
name, value_len, flags);
@@ -135,29 +123,11 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
* On success fid points to xattr
*/
retval = p9_client_xattrcreate(fid, name, value_len, flags);
- if (retval < 0) {
+ if (retval < 0)
p9_debug(P9_DEBUG_VFS, "p9_client_xattrcreate failed %d\n",
retval);
- goto err;
- }
- msize = fid->clnt->msize;
- while (value_len) {
- if (value_len > (msize - P9_IOHDRSZ))
- write_count = msize - P9_IOHDRSZ;
- else
- write_count = value_len;
- write_count = p9_client_write(fid, ((char *)value)+offset,
- NULL, offset, write_count);
- if (write_count < 0) {
- /* error in xattr write */
- retval = write_count;
- goto err;
- }
- offset += write_count;
- value_len -= write_count;
- }
- retval = 0;
-err:
+ else
+ p9_client_write(fid, 0, &from, &retval);
p9_client_clunk(fid);
return retval;
}
diff --git a/fs/Kconfig b/fs/Kconfig
index ec35851e5b71..011f43365d7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -32,6 +32,7 @@ source "fs/gfs2/Kconfig"
source "fs/ocfs2/Kconfig"
source "fs/btrfs/Kconfig"
source "fs/nilfs2/Kconfig"
+source "fs/f2fs/Kconfig"
config FS_DAX
bool "Direct Access (DAX) support"
@@ -217,7 +218,6 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
-source "fs/f2fs/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 270c48148f79..2d0cbbd14cfc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -27,9 +27,6 @@ config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
-config ARCH_BINFMT_ELF_RANDOMIZE_PIE
- bool
-
config ARCH_BINFMT_ELF_STATE
bool
diff --git a/fs/Makefile b/fs/Makefile
index a88ac4838c9e..cb92fd4c3172 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_CACHEFILES) += cachefiles/
obj-$(CONFIG_DEBUG_FS) += debugfs/
+obj-$(CONFIG_TRACING) += tracefs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index f2ba88ab4aed..82d14cdf70f9 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -61,6 +61,7 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
kcalloc(size, sizeof(struct buffer_head *),
GFP_KERNEL);
if (!bh_fplus) {
+ ret = -ENOMEM;
adfs_error(sb, "not enough memory for"
" dir object %X (%d blocks)", id, size);
goto out;
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 07c9edce5aa7..46c0d5671cd5 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -23,11 +23,9 @@
const struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9852bdf34d76..a19c31d3f369 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -316,7 +316,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
dm = kmalloc(nzones * sizeof(*dm), GFP_KERNEL);
if (dm == NULL) {
adfs_error(sb, "not enough memory");
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
for (zone = 0; zone < nzones; zone++, map_addr++) {
@@ -349,7 +349,7 @@ error_free:
brelse(dm[zone].dm_bh);
kfree(dm);
- return NULL;
+ return ERR_PTR(-EIO);
}
static inline unsigned long adfs_discsize(struct adfs_discrecord *dr, int block_bits)
@@ -370,6 +370,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
unsigned char *b_data;
struct adfs_sb_info *asb;
struct inode *root;
+ int ret = -EINVAL;
sb->s_flags |= MS_NODIRATIME;
@@ -391,6 +392,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
sb_set_blocksize(sb, BLOCK_SIZE);
if (!(bh = sb_bread(sb, ADFS_DISCRECORD / BLOCK_SIZE))) {
adfs_error(sb, "unable to read superblock");
+ ret = -EIO;
goto error;
}
@@ -400,6 +402,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk("VFS: Can't find an adfs filesystem on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error_free_bh;
}
@@ -412,6 +415,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk("VPS: Can't find an adfs filesystem on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error_free_bh;
}
@@ -421,11 +425,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!bh) {
adfs_error(sb, "couldn't read superblock on "
"2nd try.");
+ ret = -EIO;
goto error;
}
b_data = bh->b_data + (ADFS_DISCRECORD % sb->s_blocksize);
if (adfs_checkbblk(b_data)) {
adfs_error(sb, "disc record mismatch, very weird!");
+ ret = -EINVAL;
goto error_free_bh;
}
dr = (struct adfs_discrecord *)(b_data + ADFS_DR_OFFSET);
@@ -433,6 +439,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
if (!silent)
printk(KERN_ERR "VFS: Unsupported blocksize on dev "
"%s.\n", sb->s_id);
+ ret = -EINVAL;
goto error;
}
@@ -447,10 +454,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
asb->s_size = adfs_discsize(dr, sb->s_blocksize_bits);
asb->s_version = dr->format_version;
asb->s_log2sharesize = dr->log2sharesize;
-
+
asb->s_map = adfs_read_map(sb, dr);
- if (!asb->s_map)
+ if (IS_ERR(asb->s_map)) {
+ ret = PTR_ERR(asb->s_map);
goto error_free_bh;
+ }
brelse(bh);
@@ -499,6 +508,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
adfs_error(sb, "get root inode failed\n");
+ ret = -EIO;
goto error;
}
return 0;
@@ -508,7 +518,7 @@ error_free_bh:
error:
sb->s_fs_info = NULL;
kfree(asb);
- return -EINVAL;
+ return ret;
}
static struct dentry *adfs_mount(struct file_system_type *fs_type,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c8764bd7497d..cffe8370fb44 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,18 +106,22 @@ struct affs_sb_info {
spinlock_t work_lock; /* protects sb_work and work_queued */
};
-#define SF_INTL 0x0001 /* International filesystem. */
-#define SF_BM_VALID 0x0002 /* Bitmap is valid. */
-#define SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
-#define SF_QUIET 0x0008 /* chmod errors will be not reported */
-#define SF_SETUID 0x0010 /* Ignore Amiga uid */
-#define SF_SETGID 0x0020 /* Ignore Amiga gid */
-#define SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
-#define SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
-#define SF_OFS 0x0200 /* Old filesystem */
-#define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
-#define SF_VERBOSE 0x0800 /* Talk about fs when mounting */
-#define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
+#define AFFS_MOUNT_SF_INTL 0x0001 /* International filesystem. */
+#define AFFS_MOUNT_SF_BM_VALID 0x0002 /* Bitmap is valid. */
+#define AFFS_MOUNT_SF_IMMUTABLE 0x0004 /* Protection bits cannot be changed */
+#define AFFS_MOUNT_SF_QUIET 0x0008 /* chmod errors will be not reported */
+#define AFFS_MOUNT_SF_SETUID 0x0010 /* Ignore Amiga uid */
+#define AFFS_MOUNT_SF_SETGID 0x0020 /* Ignore Amiga gid */
+#define AFFS_MOUNT_SF_SETMODE 0x0040 /* Ignore Amiga protection bits */
+#define AFFS_MOUNT_SF_MUFS 0x0100 /* Use MUFS uid/gid mapping */
+#define AFFS_MOUNT_SF_OFS 0x0200 /* Old filesystem */
+#define AFFS_MOUNT_SF_PREFIX 0x0400 /* Buffer for prefix is allocated */
+#define AFFS_MOUNT_SF_VERBOSE 0x0800 /* Talk about fs when mounting */
+#define AFFS_MOUNT_SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */
+
+#define affs_clear_opt(o, opt) (o &= ~AFFS_MOUNT_##opt)
+#define affs_set_opt(o, opt) (o |= AFFS_MOUNT_##opt)
+#define affs_test_opt(o, opt) ((o) & AFFS_MOUNT_##opt)
/* short cut to get to the affs specific sb data */
static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 388da1ea815d..5022ac96aa40 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -472,7 +472,8 @@ bool
affs_nofilenametruncate(const struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
- return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE;
+
+ return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf95669..659c579c4588 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
* affs regular file handling primitives
*/
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "affs.h"
static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
}
static ssize_t
-affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -398,15 +397,15 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t ret;
- if (rw == WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
loff_t size = offset + count;
if (AFFS_I(inode)->mmu_private < size)
return 0;
}
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block);
- if (ret < 0 && (rw & WRITE))
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block);
+ if (ret < 0 && iov_iter_rw(iter) == WRITE)
affs_write_failed(mapping, offset + count);
return ret;
}
@@ -699,8 +698,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
boff = tmp % bsize;
if (boff) {
bh = affs_bread_ino(inode, bidx, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ written = PTR_ERR(bh);
+ goto err_first_bh;
+ }
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +713,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
bidx++;
} else if (bidx) {
bh = affs_bread_ino(inode, bidx - 1, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ written = PTR_ERR(bh);
+ goto err_first_bh;
+ }
}
while (from + bsize <= to) {
prev_bh = bh;
bh = affs_getemptyblk_ino(inode, bidx);
if (IS_ERR(bh))
- goto out;
+ goto err_bh;
memcpy(AFFS_DATA(bh), data + from, bsize);
if (buffer_new(bh)) {
AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +754,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
prev_bh = bh;
bh = affs_bread_ino(inode, bidx, 1);
if (IS_ERR(bh))
- goto out;
+ goto err_bh;
tmp = min(bsize, to - from);
BUG_ON(tmp > bsize);
memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +793,13 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+err_first_bh:
unlock_page(page);
page_cache_release(page);
return written;
-out:
+err_bh:
bh = prev_bh;
if (!written)
written = PTR_ERR(bh);
@@ -910,7 +914,7 @@ affs_truncate(struct inode *inode)
if (inode->i_size) {
AFFS_I(inode)->i_blkcnt = last_blk + 1;
AFFS_I(inode)->i_extcnt = ext + 1;
- if (AFFS_SB(sb)->s_flags & SF_OFS) {
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS)) {
struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
u32 tmp;
if (IS_ERR(bh)) {
@@ -964,9 +968,7 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
const struct file_operations affs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.open = affs_file_open,
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 6f34510449e8..9628003ccd2f 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -66,23 +66,23 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
AFFS_I(inode)->i_lastalloc = 0;
AFFS_I(inode)->i_pa_cnt = 0;
- if (sbi->s_flags & SF_SETMODE)
+ if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
inode->i_mode = prot_to_mode(prot);
id = be16_to_cpu(tail->uid);
- if (id == 0 || sbi->s_flags & SF_SETUID)
+ if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
inode->i_uid = sbi->s_uid;
- else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
+ else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
i_uid_write(inode, 0);
else
i_uid_write(inode, id);
id = be16_to_cpu(tail->gid);
- if (id == 0 || sbi->s_flags & SF_SETGID)
+ if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETGID))
inode->i_gid = sbi->s_gid;
- else if (id == 0xFFFF && sbi->s_flags & SF_MUFS)
+ else if (id == 0xFFFF && affs_test_opt(sbi->s_flags, SF_MUFS))
i_gid_write(inode, 0);
else
i_gid_write(inode, id);
@@ -94,7 +94,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
/* fall through */
case ST_USERDIR:
if (be32_to_cpu(tail->stype) == ST_USERDIR ||
- sbi->s_flags & SF_SETMODE) {
+ affs_test_opt(sbi->s_flags, SF_SETMODE)) {
if (inode->i_mode & S_IRUSR)
inode->i_mode |= S_IXUSR;
if (inode->i_mode & S_IRGRP)
@@ -133,7 +133,8 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
}
if (tail->link_chain)
set_nlink(inode, 2);
- inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
+ inode->i_mapping->a_ops = affs_test_opt(sbi->s_flags, SF_OFS) ?
+ &affs_aops_ofs : &affs_aops;
inode->i_op = &affs_file_inode_operations;
inode->i_fop = &affs_file_operations;
break;
@@ -190,15 +191,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
- if (AFFS_SB(sb)->s_flags & SF_MUFS) {
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_MUFS)) {
if (uid == 0 || uid == 0xFFFF)
uid = uid ^ ~0;
if (gid == 0 || gid == 0xFFFF)
gid = gid ^ ~0;
}
- if (!(AFFS_SB(sb)->s_flags & SF_SETUID))
+ if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETUID))
tail->uid = cpu_to_be16(uid);
- if (!(AFFS_SB(sb)->s_flags & SF_SETGID))
+ if (!affs_test_opt(AFFS_SB(sb)->s_flags, SF_SETGID))
tail->gid = cpu_to_be16(gid);
}
}
@@ -221,11 +222,14 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
if (error)
goto out;
- if (((attr->ia_valid & ATTR_UID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETUID)) ||
- ((attr->ia_valid & ATTR_GID) && (AFFS_SB(inode->i_sb)->s_flags & SF_SETGID)) ||
+ if (((attr->ia_valid & ATTR_UID) &&
+ affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETUID)) ||
+ ((attr->ia_valid & ATTR_GID) &&
+ affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_SETGID)) ||
((attr->ia_valid & ATTR_MODE) &&
- (AFFS_SB(inode->i_sb)->s_flags & (SF_SETMODE | SF_IMMUTABLE)))) {
- if (!(AFFS_SB(inode->i_sb)->s_flags & SF_QUIET))
+ (AFFS_SB(inode->i_sb)->s_flags &
+ (AFFS_MOUNT_SF_SETMODE | AFFS_MOUNT_SF_IMMUTABLE)))) {
+ if (!affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_QUIET))
error = -EPERM;
goto out;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index ffb7bd82c2a5..ec8ca0efb960 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -53,7 +53,8 @@ affs_intl_toupper(int ch)
static inline toupper_t
affs_get_toupper(struct super_block *sb)
{
- return AFFS_SB(sb)->s_flags & SF_INTL ? affs_intl_toupper : affs_toupper;
+ return affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL) ?
+ affs_intl_toupper : affs_toupper;
}
/*
@@ -275,7 +276,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
inode->i_op = &affs_file_inode_operations;
inode->i_fop = &affs_file_operations;
- inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
+ inode->i_mapping->a_ops = affs_test_opt(AFFS_SB(sb)->s_flags, SF_OFS) ?
+ &affs_aops_ofs : &affs_aops;
error = affs_add_entry(dir, inode, dentry, ST_FILE);
if (error) {
clear_nlink(inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 4cf0e9113fb6..3f89c9e05b40 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -227,22 +227,22 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
if (match_octal(&args[0], &option))
return 0;
*mode = option & 0777;
- *mount_opts |= SF_SETMODE;
+ affs_set_opt(*mount_opts, SF_SETMODE);
break;
case Opt_mufs:
- *mount_opts |= SF_MUFS;
+ affs_set_opt(*mount_opts, SF_MUFS);
break;
case Opt_notruncate:
- *mount_opts |= SF_NO_TRUNCATE;
+ affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
break;
case Opt_prefix:
*prefix = match_strdup(&args[0]);
if (!*prefix)
return 0;
- *mount_opts |= SF_PREFIX;
+ affs_set_opt(*mount_opts, SF_PREFIX);
break;
case Opt_protect:
- *mount_opts |= SF_IMMUTABLE;
+ affs_set_opt(*mount_opts, SF_IMMUTABLE);
break;
case Opt_reserved:
if (match_int(&args[0], reserved))
@@ -258,7 +258,7 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
*gid = make_kgid(current_user_ns(), option);
if (!gid_valid(*gid))
return 0;
- *mount_opts |= SF_SETGID;
+ affs_set_opt(*mount_opts, SF_SETGID);
break;
case Opt_setuid:
if (match_int(&args[0], &option))
@@ -266,10 +266,10 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved,
*uid = make_kuid(current_user_ns(), option);
if (!uid_valid(*uid))
return 0;
- *mount_opts |= SF_SETUID;
+ affs_set_opt(*mount_opts, SF_SETUID);
break;
case Opt_verbose:
- *mount_opts |= SF_VERBOSE;
+ affs_set_opt(*mount_opts, SF_VERBOSE);
break;
case Opt_volume: {
char *vol = match_strdup(&args[0]);
@@ -435,30 +435,31 @@ got_root:
case MUFS_FS:
case MUFS_INTLFFS:
case MUFS_DCFFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
/* fall thru */
case FS_INTLFFS:
case FS_DCFFS:
- sbi->s_flags |= SF_INTL;
+ affs_set_opt(sbi->s_flags, SF_INTL);
break;
case MUFS_FFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
break;
case FS_FFS:
break;
case MUFS_OFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
/* fall thru */
case FS_OFS:
- sbi->s_flags |= SF_OFS;
+ affs_set_opt(sbi->s_flags, SF_OFS);
sb->s_flags |= MS_NOEXEC;
break;
case MUFS_DCOFS:
case MUFS_INTLOFS:
- sbi->s_flags |= SF_MUFS;
+ affs_set_opt(sbi->s_flags, SF_MUFS);
case FS_DCOFS:
case FS_INTLOFS:
- sbi->s_flags |= SF_INTL | SF_OFS;
+ affs_set_opt(sbi->s_flags, SF_INTL);
+ affs_set_opt(sbi->s_flags, SF_OFS);
sb->s_flags |= MS_NOEXEC;
break;
default:
@@ -467,7 +468,7 @@ got_root:
return -EINVAL;
}
- if (mount_flags & SF_VERBOSE) {
+ if (affs_test_opt(mount_flags, SF_VERBOSE)) {
u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
len > 31 ? 31 : len,
@@ -478,7 +479,7 @@ got_root:
sb->s_flags |= MS_NODEV | MS_NOSUID;
sbi->s_data_blksize = sb->s_blocksize;
- if (sbi->s_flags & SF_OFS)
+ if (affs_test_opt(sbi->s_flags, SF_OFS))
sbi->s_data_blksize -= 24;
tmp_flags = sb->s_flags;
@@ -493,7 +494,7 @@ got_root:
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
- if (AFFS_SB(sb)->s_flags & SF_INTL)
+ if (affs_test_opt(AFFS_SB(sb)->s_flags, SF_INTL))
sb->s_d_op = &affs_intl_dentry_operations;
else
sb->s_d_op = &affs_dentry_operations;
@@ -520,10 +521,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
int root_block;
unsigned long mount_flags;
int res = 0;
- char *new_opts = kstrdup(data, GFP_KERNEL);
+ char *new_opts;
char volume[32];
char *prefix = NULL;
+ new_opts = kstrdup(data, GFP_KERNEL);
+ if (!new_opts)
+ return -ENOMEM;
+
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 932ce07948b3..999bc3caec92 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -31,8 +31,6 @@ const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = afs_file_write,
.mmap = generic_file_readonly_mmap,
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 0dd4dafee10b..91ea1aa0d8b3 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,9 +22,12 @@
int afs_abort_to_error(u32 abort_code)
{
switch (abort_code) {
+ /* low errno codes inserted into abort namespace */
case 13: return -EACCES;
case 27: return -EFBIG;
case 30: return -EROFS;
+
+ /* VICE "special error" codes; 101 - 111 */
case VSALVAGE: return -EIO;
case VNOVNODE: return -ENOENT;
case VNOVOL: return -ENOMEDIUM;
@@ -36,11 +39,18 @@ int afs_abort_to_error(u32 abort_code)
case VOVERQUOTA: return -EDQUOT;
case VBUSY: return -EBUSY;
case VMOVED: return -ENXIO;
- case 0x2f6df0a: return -EWOULDBLOCK;
+
+ /* Unified AFS error table; ET "uae" == 0x2f6df00 */
+ case 0x2f6df00: return -EPERM;
+ case 0x2f6df01: return -ENOENT;
+ case 0x2f6df04: return -EIO;
+ case 0x2f6df0a: return -EAGAIN;
+ case 0x2f6df0b: return -ENOMEM;
case 0x2f6df0c: return -EACCES;
case 0x2f6df0f: return -EBUSY;
case 0x2f6df10: return -EEXIST;
case 0x2f6df11: return -EXDEV;
+ case 0x2f6df12: return -ENODEV;
case 0x2f6df13: return -ENOTDIR;
case 0x2f6df14: return -EISDIR;
case 0x2f6df15: return -EINVAL;
@@ -54,8 +64,12 @@ int afs_abort_to_error(u32 abort_code)
case 0x2f6df23: return -ENAMETOOLONG;
case 0x2f6df24: return -ENOLCK;
case 0x2f6df26: return -ENOTEMPTY;
+ case 0x2f6df28: return -EWOULDBLOCK;
+ case 0x2f6df69: return -ENOTCONN;
+ case 0x2f6df6c: return -ETIMEDOUT;
case 0x2f6df78: return -EDQUOT;
+ /* RXKAD abort codes; from include/rxrpc/packet.h. ET "RXK" == 0x1260B00 */
case RXKADINCONSISTENCY: return -EPROTO;
case RXKADPACKETSHORT: return -EPROTO;
case RXKADLEVELFAIL: return -EKEYREJECTED;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index dbc732e9a5c0..3a57a1b0fb51 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -770,15 +770,12 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
void afs_send_empty_reply(struct afs_call *call)
{
struct msghdr msg;
- struct kvec iov[1];
_enter("");
- iov[0].iov_base = NULL;
- iov[0].iov_len = 0;
msg.msg_name = NULL;
msg.msg_namelen = 0;
- iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 0, 0); /* WTF? */
+ iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08964ed..0714abcd7f32 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
-#include <linux/aio.h>
#include "internal.h"
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index f8e52a1854c1..480440f4701f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -77,6 +77,11 @@ struct kioctx_cpu {
unsigned reqs_available;
};
+struct ctx_rq_wait {
+ struct completion comp;
+ atomic_t count;
+};
+
struct kioctx {
struct percpu_ref users;
atomic_t dead;
@@ -115,7 +120,7 @@ struct kioctx {
/*
* signals when all in-flight requests are done
*/
- struct completion *requests_done;
+ struct ctx_rq_wait *rq_wait;
struct {
/*
@@ -151,6 +156,38 @@ struct kioctx {
unsigned id;
};
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED ((void *) (~0ULL))
+
+struct aio_kiocb {
+ struct kiocb common;
+
+ struct kioctx *ki_ctx;
+ kiocb_cancel_fn *ki_cancel;
+
+ struct iocb __user *ki_user_iocb; /* user's aiocb */
+ __u64 ki_user_data; /* user's data for completion */
+
+ struct list_head ki_list; /* the aio core uses this
+ * for cancellation */
+
+ /*
+ * If the aio_resfd field of the userspace iocb is not zero,
+ * this is the underlying eventfd context to deliver events to.
+ */
+ struct eventfd_ctx *ki_eventfd;
+};
+
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
unsigned long aio_nr; /* current system wide number of aio requests */
@@ -220,7 +257,7 @@ static int __init aio_setup(void)
if (IS_ERR(aio_mnt))
panic("Failed to create aio fs mount.");
- kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -278,11 +315,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct kioctx_table *table;
- int i;
+ int i, res = -EINVAL;
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
@@ -292,13 +329,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
ctx = table->table[i];
if (ctx && ctx->aio_ring_file == file) {
- ctx->user_id = ctx->mmap_base = vma->vm_start;
+ if (!atomic_read(&ctx->dead)) {
+ ctx->user_id = ctx->mmap_base = vma->vm_start;
+ res = 0;
+ }
break;
}
}
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
+ return res;
}
static const struct file_operations aio_ring_fops = {
@@ -480,8 +521,9 @@ static int aio_setup_ring(struct kioctx *ctx)
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
@@ -496,7 +538,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
{
kiocb_cancel_fn *old, *cancel;
@@ -514,7 +556,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
} while (cancel != old);
- return cancel(kiocb);
+ return cancel(&kiocb->common);
}
static void free_ioctx(struct work_struct *work)
@@ -535,8 +577,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
/* At this point we know that there are no any in-flight requests */
- if (ctx->requests_done)
- complete(ctx->requests_done);
+ if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
+ complete(&ctx->rq_wait->comp);
INIT_WORK(&ctx->free_work, free_ioctx);
schedule_work(&ctx->free_work);
@@ -550,13 +592,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
static void free_ioctx_users(struct percpu_ref *ref)
{
struct kioctx *ctx = container_of(ref, struct kioctx, users);
- struct kiocb *req;
+ struct aio_kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
- struct kiocb, ki_list);
+ struct aio_kiocb, ki_list);
list_del_init(&req->ki_list);
kiocb_cancel(req);
@@ -655,8 +697,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
nr_events *= 2;
/* Prevent overflows */
- if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
- (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
+ if (nr_events > (0x10000000U / sizeof(struct io_event))) {
pr_debug("ENOMEM: nr_events too high\n");
return ERR_PTR(-EINVAL);
}
@@ -727,6 +768,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
err_cleanup:
aio_nr_sub(ctx->max_reqs);
err_ctx:
+ atomic_set(&ctx->dead, 1);
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
aio_free_ring(ctx);
err:
mutex_unlock(&ctx->ring_lock);
@@ -744,15 +788,16 @@ err:
* the rapid destruction of the kioctx.
*/
static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
- struct completion *requests_done)
+ struct ctx_rq_wait *wait)
{
struct kioctx_table *table;
- if (atomic_xchg(&ctx->dead, 1))
+ spin_lock(&mm->ioctx_lock);
+ if (atomic_xchg(&ctx->dead, 1)) {
+ spin_unlock(&mm->ioctx_lock);
return -EINVAL;
+ }
-
- spin_lock(&mm->ioctx_lock);
table = rcu_dereference_raw(mm->ioctx_table);
WARN_ON(ctx != table->table[ctx->id]);
table->table[ctx->id] = NULL;
@@ -773,27 +818,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
if (ctx->mmap_size)
vm_munmap(ctx->mmap_base, ctx->mmap_size);
- ctx->requests_done = requests_done;
+ ctx->rq_wait = wait;
percpu_ref_kill(&ctx->users);
return 0;
}
-/* wait_on_sync_kiocb:
- * Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
- while (!req->ki_ctx) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (req->ki_ctx)
- break;
- io_schedule();
- }
- __set_current_state(TASK_RUNNING);
- return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
/*
* exit_aio: called when the last user of mm goes away. At this point, there is
* no way for any new requests to be submited or any of the io_* syscalls to be
@@ -805,18 +834,24 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
void exit_aio(struct mm_struct *mm)
{
struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
- int i;
+ struct ctx_rq_wait wait;
+ int i, skipped;
if (!table)
return;
+ atomic_set(&wait.count, table->nr);
+ init_completion(&wait.comp);
+
+ skipped = 0;
for (i = 0; i < table->nr; ++i) {
struct kioctx *ctx = table->table[i];
- struct completion requests_done =
- COMPLETION_INITIALIZER_ONSTACK(requests_done);
- if (!ctx)
+ if (!ctx) {
+ skipped++;
continue;
+ }
+
/*
* We don't need to bother with munmap() here - exit_mmap(mm)
* is coming and it'll unmap everything. And we simply can't,
@@ -825,10 +860,12 @@ void exit_aio(struct mm_struct *mm)
* that it needs to unmap the area, just set it to 0.
*/
ctx->mmap_size = 0;
- kill_ioctx(mm, ctx, &requests_done);
+ kill_ioctx(mm, ctx, &wait);
+ }
+ if (!atomic_sub_and_test(skipped, &wait.count)) {
/* Wait until all IO for the context are done. */
- wait_for_completion(&requests_done);
+ wait_for_completion(&wait.comp);
}
RCU_INIT_POINTER(mm->ioctx_table, NULL);
@@ -948,9 +985,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
* Allocate a slot for an aio request.
* Returns NULL if no requests are free.
*/
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
if (!get_reqs_available(ctx)) {
user_refill_reqs_available(ctx);
@@ -971,10 +1008,10 @@ out_put:
return NULL;
}
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
{
- if (req->ki_filp)
- fput(req->ki_filp);
+ if (req->common.ki_filp)
+ fput(req->common.ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
kmem_cache_free(kiocb_cachep, req);
@@ -1010,8 +1047,9 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
@@ -1025,13 +1063,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
* ref, no other paths have a way to get another ref
* - the sync task helpfully left a reference to itself in the iocb
*/
- if (is_sync_kiocb(iocb)) {
- iocb->ki_user_data = res;
- smp_wmb();
- iocb->ki_ctx = ERR_PTR(-EXDEV);
- wake_up_process(iocb->ki_obj.tsk);
- return;
- }
+ BUG_ON(is_sync_kiocb(kiocb));
if (iocb->ki_list.next) {
unsigned long flags;
@@ -1057,7 +1089,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+ event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
event->data = iocb->ki_user_data;
event->res = res;
event->res2 = res2;
@@ -1066,7 +1098,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+ ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
res, res2);
/* after flagging the request as done, we
@@ -1113,7 +1145,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
percpu_ref_put(&ctx->reqs);
}
-EXPORT_SYMBOL(aio_complete);
/* aio_read_events_ring
* Pull an event off of the ioctx's event ring. Returns the number of
@@ -1313,15 +1344,17 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- struct completion requests_done =
- COMPLETION_INITIALIZER_ONSTACK(requests_done);
+ struct ctx_rq_wait wait;
int ret;
+ init_completion(&wait.comp);
+ atomic_set(&wait.count, 1);
+
/* Pass requests_done to kill_ioctx() where it can be set
* in a thread-safe way. If we try to set it here then we have
* a race condition if two io_destroy() called simultaneously.
*/
- ret = kill_ioctx(current->mm, ioctx, &requests_done);
+ ret = kill_ioctx(current->mm, ioctx, &wait);
percpu_ref_put(&ioctx->users);
/* Wait until all IO for the context are done. Otherwise kernel
@@ -1329,7 +1362,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
* is destroyed.
*/
if (!ret)
- wait_for_completion(&requests_done);
+ wait_for_completion(&wait.comp);
return ret;
}
@@ -1337,50 +1370,21 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
-typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
-static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- struct iovec **iovec,
- bool compat)
+static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
+ struct iovec **iovec,
+ bool compat,
+ struct iov_iter *iter)
{
- ssize_t ret;
-
- *nr_segs = kiocb->ki_nbytes;
-
#ifdef CONFIG_COMPAT
if (compat)
- ret = compat_rw_copy_check_uvector(rw,
+ return compat_import_iovec(rw,
(struct compat_iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- else
+ len, UIO_FASTIOV, iovec, iter);
#endif
- ret = rw_copy_check_uvector(rw,
- (struct iovec __user *)buf,
- *nr_segs, UIO_FASTIOV, *iovec, iovec);
- if (ret < 0)
- return ret;
-
- /* ki_nbytes now reflect bytes instead of segs */
- kiocb->ki_nbytes = ret;
- return 0;
-}
-
-static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
- int rw, char __user *buf,
- unsigned long *nr_segs,
- struct iovec *iovec)
-{
- if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
- return -EFAULT;
-
- iovec->iov_base = buf;
- iovec->iov_len = kiocb->ki_nbytes;
- *nr_segs = 1;
- return 0;
+ return import_iovec(rw, (struct iovec __user *)buf,
+ len, UIO_FASTIOV, iovec, iter);
}
/*
@@ -1388,14 +1392,12 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
* Performs the initial checks and io submission.
*/
static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
- char __user *buf, bool compat)
+ char __user *buf, size_t len, bool compat)
{
struct file *file = req->ki_filp;
ssize_t ret;
- unsigned long nr_segs;
int rw;
fmode_t mode;
- aio_rw_op *rw_op;
rw_iter_op *iter_op;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
@@ -1405,7 +1407,6 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
case IOCB_CMD_PREADV:
mode = FMODE_READ;
rw = READ;
- rw_op = file->f_op->aio_read;
iter_op = file->f_op->read_iter;
goto rw_common;
@@ -1413,51 +1414,40 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
case IOCB_CMD_PWRITEV:
mode = FMODE_WRITE;
rw = WRITE;
- rw_op = file->f_op->aio_write;
iter_op = file->f_op->write_iter;
goto rw_common;
rw_common:
if (unlikely(!(file->f_mode & mode)))
return -EBADF;
- if (!rw_op && !iter_op)
+ if (!iter_op)
return -EINVAL;
- ret = (opcode == IOCB_CMD_PREADV ||
- opcode == IOCB_CMD_PWRITEV)
- ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
- &iovec, compat)
- : aio_setup_single_vector(req, rw, buf, &nr_segs,
- iovec);
+ if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+ ret = aio_setup_vectored_rw(rw, buf, len,
+ &iovec, compat, &iter);
+ else {
+ ret = import_single_range(rw, buf, len, iovec, &iter);
+ iovec = NULL;
+ }
if (!ret)
- ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ ret = rw_verify_area(rw, file, &req->ki_pos,
+ iov_iter_count(&iter));
if (ret < 0) {
- if (iovec != inline_vecs)
- kfree(iovec);
+ kfree(iovec);
return ret;
}
- req->ki_nbytes = ret;
-
- /* XXX: move/kill - rw_verify_area()? */
- /* This matches the pread()/pwrite() logic */
- if (req->ki_pos < 0) {
- ret = -EINVAL;
- break;
- }
+ len = ret;
if (rw == WRITE)
file_start_write(file);
- if (iter_op) {
- iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
- ret = iter_op(req, &iter);
- } else {
- ret = rw_op(req, iovec, nr_segs, req->ki_pos);
- }
+ ret = iter_op(req, &iter);
if (rw == WRITE)
file_end_write(file);
+ kfree(iovec);
break;
case IOCB_CMD_FDSYNC:
@@ -1479,9 +1469,6 @@ rw_common:
return -EINVAL;
}
- if (iovec != inline_vecs)
- kfree(iovec);
-
if (ret != -EIOCBQUEUED) {
/*
* There's no easy way to restart the syscall since other AIO's
@@ -1500,7 +1487,7 @@ rw_common:
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
struct iocb *iocb, bool compat)
{
- struct kiocb *req;
+ struct aio_kiocb *req;
ssize_t ret;
/* enforce forwards compatibility on users */
@@ -1523,11 +1510,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->ki_filp = fget(iocb->aio_fildes);
- if (unlikely(!req->ki_filp)) {
+ req->common.ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->common.ki_filp)) {
ret = -EBADF;
goto out_put_req;
}
+ req->common.ki_pos = iocb->aio_offset;
+ req->common.ki_complete = aio_complete;
+ req->common.ki_flags = iocb_flags(req->common.ki_filp);
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
@@ -1542,6 +1532,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_eventfd = NULL;
goto out_put_req;
}
+
+ req->common.ki_flags |= IOCB_EVENTFD;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1550,13 +1542,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
}
- req->ki_obj.user = user_iocb;
+ req->ki_user_iocb = user_iocb;
req->ki_user_data = iocb->aio_data;
- req->ki_pos = iocb->aio_offset;
- req->ki_nbytes = iocb->aio_nbytes;
- ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+ ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
(char __user *)(unsigned long)iocb->aio_buf,
+ iocb->aio_nbytes,
compat);
if (ret)
goto out_put_req;
@@ -1643,10 +1634,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
/* lookup_kiocb
* Finds a given iocb for cancellation.
*/
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
- u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
{
- struct list_head *pos;
+ struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
@@ -1654,9 +1645,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
return NULL;
/* TODO: use a hash or array, this sucks. */
- list_for_each(pos, &ctx->active_reqs) {
- struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb)
+ list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+ if (kiocb->ki_user_iocb == iocb)
return kiocb;
}
return NULL;
@@ -1676,7 +1666,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
struct kioctx *ctx;
- struct kiocb *kiocb;
+ struct aio_kiocb *kiocb;
u32 key;
int ret;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 8e98cf954bab..d10e619632ab 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -213,7 +213,7 @@ void autofs4_clean_ino(struct autofs_info *);
static inline int autofs_prepare_pipe(struct file *pipe)
{
- if (!pipe->f_op->write)
+ if (!(pipe->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
if (!S_ISFIFO(file_inode(pipe)->i_mode))
return -EINVAL;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 116fd38ee472..2ad05ab93db8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -70,7 +70,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
mutex_lock(&sbi->pipe_mutex);
while (bytes &&
- (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
+ (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
data += wr;
bytes -= wr;
}
diff --git a/fs/befs/befs.h b/fs/befs/befs.h
index 3a7813ab8c95..1fead8d56a98 100644
--- a/fs/befs/befs.h
+++ b/fs/befs/befs.h
@@ -19,16 +19,16 @@ typedef u64 befs_blocknr_t;
* BeFS in memory structures
*/
-typedef struct befs_mount_options {
+struct befs_mount_options {
kgid_t gid;
kuid_t uid;
int use_gid;
int use_uid;
int debug;
char *iocharset;
-} befs_mount_options;
+};
-typedef struct befs_sb_info {
+struct befs_sb_info {
u32 magic1;
u32 block_size;
u32 block_shift;
@@ -52,12 +52,11 @@ typedef struct befs_sb_info {
befs_inode_addr indices;
u32 magic3;
- befs_mount_options mount_opts;
+ struct befs_mount_options mount_opts;
struct nls_table *nls;
+};
-} befs_sb_info;
-
-typedef struct befs_inode_info {
+struct befs_inode_info {
u32 i_flags;
u32 i_type;
@@ -71,8 +70,7 @@ typedef struct befs_inode_info {
} i_data;
struct inode vfs_inode;
-
-} befs_inode_info;
+};
enum befs_err {
BEFS_OK,
@@ -105,13 +103,13 @@ void befs_dump_index_node(const struct super_block *sb, befs_btree_nodehead *);
/* Gets a pointer to the private portion of the super_block
* structure from the public part
*/
-static inline befs_sb_info *
+static inline struct befs_sb_info *
BEFS_SB(const struct super_block *super)
{
- return (befs_sb_info *) super->s_fs_info;
+ return (struct befs_sb_info *) super->s_fs_info;
}
-static inline befs_inode_info *
+static inline struct befs_inode_info *
BEFS_I(const struct inode *inode)
{
return list_entry(inode, struct befs_inode_info, vfs_inode);
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c
index 1e8e0b8d8836..ebd50718659f 100644
--- a/fs/befs/datastream.c
+++ b/fs/befs/datastream.c
@@ -168,7 +168,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds)
befs_blocknr_t blocks;
befs_blocknr_t datablocks; /* File data blocks */
befs_blocknr_t metablocks; /* FS metadata blocks */
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_debug(sb, "---> %s", __func__);
@@ -428,7 +428,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
struct buffer_head *indir_block;
befs_block_run indir_run;
befs_disk_inode_addr *iaddr_array = NULL;
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_blocknr_t indir_start_blk =
data->max_indirect_range >> befs_sb->block_shift;
diff --git a/fs/befs/io.c b/fs/befs/io.c
index 0408a3d601d0..7a5b4ec21c56 100644
--- a/fs/befs/io.c
+++ b/fs/befs/io.c
@@ -28,7 +28,7 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
{
struct buffer_head *bh = NULL;
befs_blocknr_t block = 0;
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
befs_debug(sb, "---> Enter %s "
"[%u, %hu, %hu]", __func__, iaddr.allocation_group,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e089f1985fca..16e0a48bfccd 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -51,7 +51,7 @@ static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
static void befs_put_super(struct super_block *);
static int befs_remount(struct super_block *, int *, char *);
static int befs_statfs(struct dentry *, struct kstatfs *);
-static int parse_options(char *, befs_mount_options *);
+static int parse_options(char *, struct befs_mount_options *);
static const struct super_operations befs_sops = {
.alloc_inode = befs_alloc_inode, /* allocate a new inode */
@@ -304,9 +304,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
{
struct buffer_head *bh = NULL;
befs_inode *raw_inode = NULL;
-
- befs_sb_info *befs_sb = BEFS_SB(sb);
- befs_inode_info *befs_ino = NULL;
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_inode_info *befs_ino = NULL;
struct inode *inode;
long ret = -EIO;
@@ -472,7 +471,7 @@ static void *
befs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct super_block *sb = dentry->d_sb;
- befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+ struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
befs_data_stream *data = &befs_ino->i_data.ds;
befs_off_t len = data->size;
char *link;
@@ -502,7 +501,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
static void *
befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+ struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+
nd_set_link(nd, befs_ino->i_data.symlink);
return NULL;
}
@@ -669,7 +669,7 @@ static const match_table_t befs_tokens = {
};
static int
-parse_options(char *options, befs_mount_options * opts)
+parse_options(char *options, struct befs_mount_options *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -769,7 +769,7 @@ static int
befs_fill_super(struct super_block *sb, void *data, int silent)
{
struct buffer_head *bh;
- befs_sb_info *befs_sb;
+ struct befs_sb_info *befs_sb;
befs_super_block *disk_sb;
struct inode *root;
long ret = -EINVAL;
diff --git a/fs/befs/super.c b/fs/befs/super.c
index ca40f828f64d..aeafc4d84278 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -24,7 +24,7 @@
int
befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
{
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
/* Check the byte order of the filesystem */
if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
@@ -59,7 +59,7 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
int
befs_check_sb(struct super_block *sb)
{
- befs_sb_info *befs_sb = BEFS_SB(sb);
+ struct befs_sb_info *befs_sb = BEFS_SB(sb);
/* Check magic headers of super block */
if ((befs_sb->magic1 != BEFS_SUPER_MAGIC1)
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 08063ae0a17c..7a8182770649 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -86,7 +86,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode = new_inode(s);
if (!inode)
- return -ENOSPC;
+ return -ENOMEM;
mutex_lock(&info->bfs_lock);
ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
if (ino > info->si_lasti) {
@@ -293,7 +293,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
for (block = sblock; block <= eblock; block++) {
bh = sb_bread(dir->i_sb, block);
if (!bh)
- return -ENOSPC;
+ return -EIO;
for (off = 0; off < BFS_BSIZE; off += BFS_DIRENT_SIZE) {
de = (struct bfs_dirent *)(bh->b_data + off);
if (!de->ino) {
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index e7f88ace1a25..97f1b5160155 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -23,9 +23,7 @@
const struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079d9982..fdcb4d69f430 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/writeback.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
#include "bfs.h"
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 995986b8e36b..241ef68d2893 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf-randomize.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <linux/sched.h>
@@ -862,6 +863,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -909,25 +911,20 @@ static int load_elf_binary(struct linux_binprm *bprm)
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
-#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
- /* Memory randomization might have been switched off
- * in runtime via sysctl or explicit setting of
- * personality flags.
- * If that is the case, retain the original non-zero
- * load_bias value in order to establish proper
- * non-randomized mappings.
- */
+ load_bias = ELF_ET_DYN_BASE - vaddr;
if (current->flags & PF_RANDOMIZE)
- load_bias = 0;
- else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#else
- load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-#endif
+ load_bias += arch_mmap_rnd();
+ load_bias = ELF_PAGESTART(load_bias);
+ total_size = total_mapping_size(elf_phdata,
+ loc->elf_ex.e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_free_dentry;
+ }
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
@@ -1053,15 +1050,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
-#ifdef arch_randomize_brk
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
-#ifdef CONFIG_COMPAT_BRK
+#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
-#endif
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 97aff2879cda..9dcb05409ba7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -9,6 +9,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -521,9 +522,8 @@ static int parse_command(const char __user *buffer, size_t count)
static void entry_status(Node *e, char *page)
{
- char *dp;
- char *status = "disabled";
- const char *flags = "flags: ";
+ char *dp = page;
+ const char *status = "disabled";
if (test_bit(Enabled, &e->flags))
status = "enabled";
@@ -533,12 +533,10 @@ static void entry_status(Node *e, char *page)
return;
}
- sprintf(page, "%s\ninterpreter %s\n", status, e->interpreter);
- dp = page + strlen(page);
+ dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
/* print the special flags */
- sprintf(dp, "%s", flags);
- dp += strlen(flags);
+ dp += sprintf(dp, "flags: ");
if (e->flags & MISC_FMT_PRESERVE_ARGV0)
*dp++ = 'P';
if (e->flags & MISC_FMT_OPEN_BINARY)
@@ -550,21 +548,11 @@ static void entry_status(Node *e, char *page)
if (!test_bit(Magic, &e->flags)) {
sprintf(dp, "extension .%s\n", e->magic);
} else {
- int i;
-
- sprintf(dp, "offset %i\nmagic ", e->offset);
- dp = page + strlen(page);
- for (i = 0; i < e->size; i++) {
- sprintf(dp, "%02x", 0xff & (int) (e->magic[i]));
- dp += 2;
- }
+ dp += sprintf(dp, "offset %i\nmagic ", e->offset);
+ dp = bin2hex(dp, e->magic, e->size);
if (e->mask) {
- sprintf(dp, "\nmask ");
- dp += 6;
- for (i = 0; i < e->size; i++) {
- sprintf(dp, "%02x", 0xff & (int) (e->mask[i]));
- dp += 2;
- }
+ dp += sprintf(dp, "\nmask ");
+ dp = bin2hex(dp, e->mask, e->size);
}
*dp++ = '\n';
*dp = '\0';
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266be67d3..897ee0503932 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
-#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -147,15 +146,13 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
}
static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
- offset, blkdev_get_block,
- NULL, NULL, 0);
+ return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
+ blkdev_get_block, NULL, NULL, 0);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1598,9 +1595,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = file->f_mapping->host;
+ loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
ssize_t ret;
+ if (bdev_read_only(I_BDEV(bd_inode)))
+ return -EPERM;
+
+ if (!iov_iter_count(from))
+ return 0;
+
+ if (iocb->ki_pos >= size)
+ return -ENOSPC;
+
+ iov_iter_truncate(from, size - iocb->ki_pos);
+
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0) {
@@ -1660,8 +1670,6 @@ const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.mmap = generic_file_mmap,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
parent_nritems = btrfs_header_nritems(parent);
blocksize = root->nodesize;
- end_slot = parent_nritems;
+ end_slot = parent_nritems - 1;
- if (parent_nritems == 1)
+ if (parent_nritems <= 1)
return 0;
btrfs_set_lock_blocking(parent);
- for (i = start_slot; i < end_slot; i++) {
+ for (i = start_slot; i <= end_slot; i++) {
int close = 1;
btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
other = btrfs_node_blockptr(parent, i - 1);
close = close_blocks(blocknr, other, blocksize);
}
- if (!close && i < end_slot - 2) {
+ if (!close && i < end_slot) {
other = btrfs_node_blockptr(parent, i + 1);
close = close_blocks(blocknr, other, blocksize);
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
loff_t actual_len, u64 *alloc_hint);
int btrfs_inode_check_errors(struct inode *inode);
extern const struct dentry_operations btrfs_dentry_operations;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode);
+#endif
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
}
if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk)) {
- printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
+ printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
btrfs_super_sys_array_size(sb),
sizeof(struct btrfs_disk_key)
+ sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
return 0;
}
+ if (trans->aborted)
+ return 0;
again:
inode = lookup_free_space_inode(root, block_group, path);
if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
+ if (ret) {
+ /*
+ * So theoretically we could recover from this, simply set the
+ * super cache generation to 0 so we know to invalidate the
+ * cache, but then we'd have to keep track of the block groups
+ * that fail this way so we know we _have_ to reset this cache
+ * before the next commit or risk reading stale cache. So to
+ * limit our exposure to horrible edge cases lets just abort the
+ * transaction, this only happens in really bad situations
+ * anyway.
+ */
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_put;
+ }
WARN_ON(ret);
if (i_size_read(inode) > 0) {
@@ -3309,6 +3325,32 @@ out:
return ret;
}
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_block_group_cache *cache, *tmp;
+ struct btrfs_transaction *cur_trans = trans->transaction;
+ struct btrfs_path *path;
+
+ if (list_empty(&cur_trans->dirty_bgs) ||
+ !btrfs_test_opt(root, SPACE_CACHE))
+ return 0;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /* Could add new block groups, use _safe just in case */
+ list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
+ dirty_list) {
+ if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+ cache_save_setup(cache, trans, path);
+ }
+
+ btrfs_free_path(path);
+ return 0;
+}
+
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
num_bytes = ALIGN(num_bytes, root->sectorsize);
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents++;
+ nr_extents = (unsigned)div64_u64(num_bytes +
+ BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ BTRFS_I(inode)->outstanding_extents += nr_extents;
+ nr_extents = 0;
if (BTRFS_I(inode)->outstanding_extents >
BTRFS_I(inode)->reserved_extents)
@@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
+ if (btrfs_test_is_dummy_root(root))
+ return;
+
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_free, 0);
if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
/* Should be safe to release our pages at this point */
btrfs_release_extent_buffer_page(eb);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+ __free_extent_buffer(eb);
+ return 1;
+ }
+#endif
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return 1;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..faa7d390841b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
@@ -32,6 +31,7 @@
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -1739,27 +1739,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
- ssize_t err = 0;
- size_t count = iov_iter_count(from);
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
- loff_t pos = iocb->ki_pos;
+ ssize_t err;
+ loff_t pos;
+ size_t count;
mutex_lock(&inode->i_mutex);
-
- current->backing_dev_info = inode_to_bdi(inode);
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- if (count == 0) {
+ err = generic_write_checks(iocb, from);
+ if (err <= 0) {
mutex_unlock(&inode->i_mutex);
- goto out;
+ return err;
}
- iov_iter_truncate(from, count);
-
+ current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_suid(file);
if (err) {
mutex_unlock(&inode->i_mutex);
@@ -1786,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/
update_time_for_write(inode);
+ pos = iocb->ki_pos;
+ count = iov_iter_count(from);
start_pos = round_down(pos, root->sectorsize);
if (start_pos > i_size_read(inode)) {
/* Expand hole size to cover write data, preventing empty gap */
@@ -1800,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (sync)
atomic_inc(&BTRFS_I(inode)->sync_writers);
- if (file->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
num_written = __btrfs_direct_write(iocb, from, pos);
} else {
num_written = __btrfs_buffered_write(file, from, pos);
@@ -1811,22 +1805,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
/*
- * we want to make sure fsync finds this change
- * but we haven't joined a transaction running right now.
- *
- * Later on, someone is sure to update the inode and get the
- * real transid recorded.
- *
- * We set last_trans now to the fs_info generation + 1,
- * this will either be one more than the running transaction
- * or the generation used for the next transaction if there isn't
- * one running right now.
- *
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
if (num_written > 0) {
err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1941,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
- * check the transaction that last modified this inode
- * and see if its already been committed
- */
- if (!BTRFS_I(inode)->last_trans) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * if the last transaction that changed this file was before
- * the current transaction, we can bail out now without any
- * syncing
+ * If the last transaction that changed this file was before the current
+ * transaction and we have the full sync flag set in our inode, we can
+ * bail out now without any syncing.
+ *
+ * Note that we can't bail out if the full sync flag isn't set. This is
+ * because when the full sync flag is set we start all ordered extents
+ * and wait for them to fully complete - when they complete they update
+ * the inode's last_trans field through:
+ *
+ * btrfs_finish_ordered_io() ->
+ * btrfs_update_inode_fallback() ->
+ * btrfs_update_inode() ->
+ * btrfs_set_inode_last_trans()
+ *
+ * So we are sure that last_trans is up to date and can do this check to
+ * bail out safely. For the fast path, when the full sync flag is not
+ * set in our inode, we can not do it because we start only our ordered
+ * extents and don't wait for them to complete (that is when
+ * btrfs_finish_ordered_io runs), so here at this point their last_trans
+ * value might be less than or equals to fs_info->last_trans_committed,
+ * and setting a speculative last_trans for an inode when a buffered
+ * write is made (such as fs_info->generation + 1 for example) would not
+ * be reliable since after setting the value and before fsync is called
+ * any number of transactions can start and commit (transaction kthread
+ * commits the current transaction periodically), and a transaction
+ * commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) {
- BTRFS_I(inode)->last_trans = 0;
-
+ (full_sync && BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed)) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2275,6 +2269,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
bool same_page;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size;
+ bool truncated_page = false;
+ bool updated_inode = false;
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
@@ -2306,13 +2302,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
* entire page.
*/
if (same_page && len < PAGE_CACHE_SIZE) {
- if (offset < ino_size)
+ if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, len, 0);
+ } else {
+ ret = 0;
+ }
goto out_only_mutex;
}
/* zero back part of the first page */
if (offset < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2349,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
+ truncated_page = true;
ret = btrfs_truncate_page(inode,
tail_start + tail_len, 0, 1);
if (ret)
@@ -2357,8 +2359,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
if (lockend < lockstart) {
- mutex_unlock(&inode->i_mutex);
- return 0;
+ ret = 0;
+ goto out_only_mutex;
}
while (1) {
@@ -2506,6 +2508,7 @@ out_trans:
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
+ updated_inode = true;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out_free:
@@ -2515,6 +2518,22 @@ out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
+ if (!updated_inode && truncated_page && !ret && !err) {
+ /*
+ * If we only end up zeroing part of a page, we still need to
+ * update the inode item, so that all the time fields are
+ * updated as well as the necessary btrfs inode in memory fields
+ * for detecting, at fsync time, if the inode isn't yet in the
+ * log tree or it's there but not up to date.
+ */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ } else {
+ err = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_end_transaction(trans, root);
+ }
+ }
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;
@@ -2781,8 +2800,6 @@ out:
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..43192e10cc43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
-#include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
@@ -43,6 +42,7 @@
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/uio.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
static int btrfs_dirty_inode(struct inode *inode);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
+
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
u64 new_size;
/*
- * We need the largest size of the remaining extent to see if we
- * need to add a new outstanding extent. Think of the following
- * case
- *
- * [MEAX_EXTENT_SIZEx2 - 4k][4k]
- *
- * The new_size would just be 4k and we'd think we had enough
- * outstanding extents for this if we only took one side of the
- * split, same goes for the other direction. We need to see if
- * the larger size still is the same amount of extents as the
- * original size, because if it is we need to add a new
- * outstanding extent. But if we split up and the larger size
- * is less than the original then we are good to go since we've
- * already accounted for the extra extent in our original
- * accounting.
+ * See the explanation in btrfs_merge_extent_hook, the same
+ * applies here, just in reverse.
*/
new_size = orig->end - split + 1;
- if ((split - orig->start) > new_size)
- new_size = split - orig->start;
-
- num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+ num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
- if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) < num_extents)
+ new_size = split - orig->start;
+ num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+ if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
return;
}
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC))
return;
- old_size = other->end - other->start + 1;
- new_size = old_size + (new->end - new->start + 1);
+ if (new->start > other->start)
+ new_size = new->end - other->start + 1;
+ else
+ new_size = other->end - new->start + 1;
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
}
/*
- * If we grew by another max_extent, just return, we want to keep that
- * reserved amount.
+ * We have to add up either side to figure out how many extents were
+ * accounted for before we merged into one big extent. If the number of
+ * extents we accounted for is <= the amount we need for the new range
+ * then we can return, otherwise drop. Think of it like this
+ *
+ * [ 4k][MAX_SIZE]
+ *
+ * So we've grown the extent by a MAX_SIZE extent, this would mean we
+ * need 2 outstanding extents, on one side we have 1 and the other side
+ * we have 1 so they are == and we can return. But in this case
+ *
+ * [MAX_SIZE+4k][MAX_SIZE+4k]
+ *
+ * Each range on their own accounts for 2 extents, but merged together
+ * they are only 3 extents worth of accounting, so we need to drop in
+ * this case.
*/
+ old_size = other->end - other->start + 1;
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
+ old_size = new->end - new->start + 1;
+ num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
- BTRFS_MAX_EXTENT_SIZE) > num_extents)
+ BTRFS_MAX_EXTENT_SIZE) >= num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
+ /* For sanity tests */
+ if (btrfs_test_is_dummy_root(root))
+ return;
+
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
root->fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
+ /* For sanity tests. */
+ if (btrfs_test_is_dummy_root(root))
+ return;
+
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
&& do_list && !(state->state & EXTENT_NORESERVE))
btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
- u64 orig_len = len;
+ u64 *outstanding_extents = NULL;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
lockstart = start;
lockend = start + len - 1;
+ if (current->journal_info) {
+ /*
+ * Need to pull our outstanding extents and set journal_info to NULL so
+ * that anything that needs to check if there's a transction doesn't get
+ * confused.
+ */
+ outstanding_extents = current->journal_info;
+ current->journal_info = NULL;
+ }
+
/*
* If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered.
@@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
em->block_start != EXTENT_MAP_HOLE)) {
int type;
- int ret;
u64 block_start, orig_start, orig_block_len, ram_bytes;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7349,11 +7381,20 @@ unlock:
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
- if (len < orig_len) {
+ /*
+ * If we have an outstanding_extents count still set then we're
+ * within our reservation, otherwise we need to adjust our inode
+ * counter appropriately.
+ */
+ if (*outstanding_extents) {
+ (*outstanding_extents)--;
+ } else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
+
+ current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len);
}
@@ -7377,6 +7418,8 @@ unlock:
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+ if (outstanding_extents)
+ current->journal_info = outstanding_extents;
return ret;
}
@@ -8038,7 +8081,7 @@ free_ordered:
bio_endio(dio_bio, ret);
}
-static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
+static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
const struct iov_iter *iter, loff_t offset)
{
int seg;
@@ -8053,7 +8096,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
goto out;
/* If this is a write we don't need to check anymore */
- if (rw & WRITE)
+ if (iov_iter_rw(iter) == WRITE)
return 0;
/*
* Check to make sure we don't have duplicate iov_base's in this
@@ -8071,18 +8114,19 @@ out:
return retval;
}
-static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ u64 outstanding_extents = 0;
size_t count = 0;
int flags = 0;
bool wakeup = true;
bool relock = false;
ssize_t ret;
- if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
+ if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
return 0;
atomic_inc(&inode->i_dio_count);
@@ -8100,7 +8144,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
filemap_fdatawrite_range(inode->i_mapping, offset,
offset + count - 1);
- if (rw & WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
/*
* If the write DIO is beyond the EOF, we need update
* the isize, but it is protected by i_mutex. So we can
@@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
goto out;
+ outstanding_extents = div64_u64(count +
+ BTRFS_MAX_EXTENT_SIZE - 1,
+ BTRFS_MAX_EXTENT_SIZE);
+
+ /*
+ * We need to know how many extents we reserved so that we can
+ * do the accounting properly if we go over the number we
+ * originally calculated. Abuse current->journal_info for this.
+ */
+ current->journal_info = &outstanding_extents;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
inode_dio_done(inode);
@@ -8120,11 +8174,12 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
wakeup = false;
}
- ret = __blockdev_direct_IO(rw, iocb, inode,
- BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
- iter, offset, btrfs_get_blocks_direct, NULL,
- btrfs_submit_direct, flags);
- if (rw & WRITE) {
+ ret = __blockdev_direct_IO(iocb, inode,
+ BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+ iter, offset, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, flags);
+ if (iov_iter_rw(iter) == WRITE) {
+ current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count);
else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
continue;
if (entry_end(ordered) <= start)
break;
- if (!list_empty(&ordered->log_list))
- continue;
- if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
+ if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
continue;
list_add(&ordered->log_list, logged_list);
atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
- if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
- list_add_tail(&ordered->trans_list, &trans->ordered);
+ list_add_tail(&ordered->trans_list, &trans->ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
if (oper1->seq < oper2->seq)
return -1;
if (oper1->seq > oper2->seq)
- return -1;
+ return 1;
if (oper1->ref_root < oper2->ref_root)
return -1;
if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
u64 parent_ino;
u64 ino;
u64 gen;
+ bool is_orphan;
struct list_head update_refs;
};
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
u64 ino_gen,
u64 parent_ino,
struct list_head *new_refs,
- struct list_head *deleted_refs)
+ struct list_head *deleted_refs,
+ const bool is_orphan)
{
struct rb_node **p = &sctx->pending_dir_moves.rb_node;
struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
pm->parent_ino = parent_ino;
pm->ino = ino;
pm->gen = ino_gen;
+ pm->is_orphan = is_orphan;
INIT_LIST_HEAD(&pm->list);
INIT_LIST_HEAD(&pm->update_refs);
RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
rmdir_ino = dm->rmdir_ino;
free_waiting_dir_move(sctx, dm);
- ret = get_first_ref(sctx->parent_root, pm->ino,
- &parent_ino, &parent_gen, name);
- if (ret < 0)
- goto out;
-
- ret = get_cur_path(sctx, parent_ino, parent_gen,
- from_path);
- if (ret < 0)
- goto out;
- ret = fs_path_add_path(from_path, name);
+ if (pm->is_orphan) {
+ ret = gen_unique_name(sctx, pm->ino,
+ pm->gen, from_path);
+ } else {
+ ret = get_first_ref(sctx->parent_root, pm->ino,
+ &parent_ino, &parent_gen, name);
+ if (ret < 0)
+ goto out;
+ ret = get_cur_path(sctx, parent_ino, parent_gen,
+ from_path);
+ if (ret < 0)
+ goto out;
+ ret = fs_path_add_path(from_path, name);
+ }
if (ret < 0)
goto out;
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
LIST_HEAD(deleted_refs);
ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
- &pm->update_refs, &deleted_refs);
+ &pm->update_refs, &deleted_refs,
+ pm->is_orphan);
if (ret < 0)
goto out;
if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
return ret;
}
+/*
+ * We might need to delay a directory rename even when no ancestor directory
+ * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
+ * renamed. This happens when we rename a directory to the old name (the name
+ * in the parent root) of some other unrelated directory that got its rename
+ * delayed due to some ancestor with higher number that got renamed.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ * . (ino 256)
+ * |---- a/ (ino 257)
+ * | |---- file (ino 260)
+ * |
+ * |---- b/ (ino 258)
+ * |---- c/ (ino 259)
+ *
+ * Send snapshot:
+ * . (ino 256)
+ * |---- a/ (ino 258)
+ * |---- x/ (ino 259)
+ * |---- y/ (ino 257)
+ * |----- file (ino 260)
+ *
+ * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
+ * from 'a' to 'x/y' happening first, which in turn depends on the rename of
+ * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
+ * must issue is:
+ *
+ * 1 - rename 259 from 'c' to 'x'
+ * 2 - rename 257 from 'a' to 'x/y'
+ * 3 - rename 258 from 'b' to 'a'
+ *
+ * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
+ * be done right away and < 0 on error.
+ */
+static int wait_for_dest_dir_move(struct send_ctx *sctx,
+ struct recorded_ref *parent_ref,
+ const bool is_orphan)
+{
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct btrfs_key di_key;
+ struct btrfs_dir_item *di;
+ u64 left_gen;
+ u64 right_gen;
+ int ret = 0;
+
+ if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
+ return 0;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = parent_ref->dir;
+ key.type = BTRFS_DIR_ITEM_KEY;
+ key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
+
+ ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+
+ di = btrfs_match_dir_item_name(sctx->parent_root, path,
+ parent_ref->name, parent_ref->name_len);
+ if (!di) {
+ ret = 0;
+ goto out;
+ }
+ /*
+ * di_key.objectid has the number of the inode that has a dentry in the
+ * parent directory with the same name that sctx->cur_ino is being
+ * renamed to. We need to check if that inode is in the send root as
+ * well and if it is currently marked as an inode with a pending rename,
+ * if it is, we need to delay the rename of sctx->cur_ino as well, so
+ * that it happens after that other inode is renamed.
+ */
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
+ if (di_key.type != BTRFS_INODE_ITEM_KEY) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
+ &left_gen, NULL, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
+ &right_gen, NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = 0;
+ goto out;
+ }
+
+ /* Different inode, no need to delay the rename of sctx->cur_ino */
+ if (right_gen != left_gen) {
+ ret = 0;
+ goto out;
+ }
+
+ if (is_waiting_for_move(sctx, di_key.objectid)) {
+ ret = add_pending_dir_move(sctx,
+ sctx->cur_ino,
+ sctx->cur_inode_gen,
+ di_key.objectid,
+ &sctx->new_refs,
+ &sctx->deleted_refs,
+ is_orphan);
+ if (!ret)
+ ret = 1;
+ }
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
static int wait_for_parent_move(struct send_ctx *sctx,
struct recorded_ref *parent_ref)
{
@@ -3349,7 +3478,8 @@ out:
sctx->cur_inode_gen,
ino,
&sctx->new_refs,
- &sctx->deleted_refs);
+ &sctx->deleted_refs,
+ false);
if (!ret)
ret = 1;
}
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
int did_overwrite = 0;
int is_orphan = 0;
u64 last_dir_ino_rm = 0;
+ bool can_rename = true;
verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
}
}
+ if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
+ ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
+ if (ret < 0)
+ goto out;
+ if (ret == 1) {
+ can_rename = false;
+ *pending_move = 1;
+ }
+ }
+
/*
* link/move the ref to the new place. If we have an orphan
* inode, move it and update valid_path. If not, link or move
* it depending on the inode mode.
*/
- if (is_orphan) {
+ if (is_orphan && can_rename) {
ret = send_rename(sctx, valid_path, cur->full_path);
if (ret < 0)
goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
ret = fs_path_copy(valid_path, cur->full_path);
if (ret < 0)
goto out;
- } else {
+ } else if (can_rename) {
if (S_ISDIR(sctx->cur_inode_mode)) {
/*
* Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
return ret;
}
+static int test_extent_accounting(void)
+{
+ struct inode *inode = NULL;
+ struct btrfs_root *root = NULL;
+ int ret = -ENOMEM;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Couldn't allocate inode\n");
+ return ret;
+ }
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ goto out;
+ }
+
+ root->fs_info = btrfs_alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ goto out;
+ }
+
+ BTRFS_I(inode)->root = root;
+ btrfs_test_inode_set_ops(inode);
+
+ /* [BTRFS_MAX_EXTENT_SIZE] */
+ BTRFS_I(inode)->outstanding_extents++;
+ ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
+ NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 1) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 1, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* [BTRFS_MAX_EXTENT_SIZE][4k] */
+ BTRFS_I(inode)->outstanding_extents++;
+ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
+ BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 2) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 2, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE >> 1,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ EXTENT_DELALLOC | EXTENT_DIRTY |
+ EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
+ NULL, GFP_NOFS);
+ if (ret) {
+ test_msg("clear_extent_bit returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 2) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 2, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* [BTRFS_MAX_EXTENT_SIZE][4K] */
+ BTRFS_I(inode)->outstanding_extents++;
+ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
+ (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+ NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 2) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 2, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /*
+ * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+ *
+ * I'm artificially adding 2 to outstanding_extents because in the
+ * buffered IO case we'd add things up as we go, but I don't feel like
+ * doing that here, this isn't the interesting case we want to test.
+ */
+ BTRFS_I(inode)->outstanding_extents += 2;
+ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
+ (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
+ NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 4) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 4, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+ BTRFS_I(inode)->outstanding_extents++;
+ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+ BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 3) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 3, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+ BTRFS_MAX_EXTENT_SIZE+4096,
+ BTRFS_MAX_EXTENT_SIZE+8191,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ NULL, GFP_NOFS);
+ if (ret) {
+ test_msg("clear_extent_bit returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 4) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 4, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /*
+ * Refill the hole again just for good measure, because I thought it
+ * might fail and I'd rather satisfy my paranoia at this point.
+ */
+ BTRFS_I(inode)->outstanding_extents++;
+ ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+ BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+ if (ret) {
+ test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents != 3) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 3, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+
+ /* Empty */
+ ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ NULL, GFP_NOFS);
+ if (ret) {
+ test_msg("clear_extent_bit returned %d\n", ret);
+ goto out;
+ }
+ if (BTRFS_I(inode)->outstanding_extents) {
+ ret = -EINVAL;
+ test_msg("Miscount, wanted 0, got %u\n",
+ BTRFS_I(inode)->outstanding_extents);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret)
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+ NULL, GFP_NOFS);
+ iput(inode);
+ btrfs_free_dummy_root(root);
+ return ret;
+}
+
int btrfs_test_inodes(void)
{
int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
if (ret)
return ret;
test_msg("Running hole first btrfs_get_extent test\n");
- return test_hole_first();
+ ret = test_hole_first();
+ if (ret)
+ return ret;
+ test_msg("Running outstanding_extents tests\n");
+ return test_extent_accounting();
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
u64 old_root_bytenr;
u64 old_root_used;
struct btrfs_root *tree_root = root->fs_info->tree_root;
- bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
old_root_used = btrfs_root_used(&root->root_item);
- btrfs_write_dirty_block_groups(trans, root);
while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
if (old_root_bytenr == root->node->start &&
- old_root_used == btrfs_root_used(&root->root_item) &&
- (!extent_root ||
- list_empty(&trans->transaction->dirty_bgs)))
+ old_root_used == btrfs_root_used(&root->root_item))
break;
btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
return ret;
old_root_used = btrfs_root_used(&root->root_item);
- if (extent_root) {
- ret = btrfs_write_dirty_block_groups(trans, root);
- if (ret)
- return ret;
- }
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
- ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- if (ret)
- return ret;
}
return 0;
@@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
struct list_head *next;
struct extent_buffer *eb;
int ret;
@@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
if (ret)
return ret;
+ ret = btrfs_setup_space_cache(trans, root);
+ if (ret)
+ return ret;
+
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
if (ret)
return ret;
-
+again:
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
@@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
ret = update_cowonly_root(trans, root);
if (ret)
return ret;
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ if (ret)
+ return ret;
}
+ while (!list_empty(dirty_bgs)) {
+ ret = btrfs_write_dirty_block_groups(trans, root);
+ if (ret)
+ return ret;
+ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+ if (ret)
+ return ret;
+ }
+
+ if (!list_empty(&fs_info->dirty_cowonly_roots))
+ goto again;
+
list_add_tail(&fs_info->extent_root->dirty_list,
&trans->transaction->switch_commits);
btrfs_after_dev_replace_commit(fs_info);
@@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, cur_trans);
+ if (unlikely(cur_trans->aborted))
+ ret = cur_trans->aborted;
+
btrfs_put_transaction(cur_trans);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
base = btrfs_item_ptr_offset(leaf, path->slots[0]);
while (cur_offset < item_size) {
- extref = (struct btrfs_inode_extref *)base + cur_offset;
+ extref = (struct btrfs_inode_extref *)(base + cur_offset);
victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cd4d1315aaa9..8222f6f74147 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4903,10 +4903,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
{
struct btrfs_bio *bbio = kzalloc(
+ /* the size of the btrfs_bio */
sizeof(struct btrfs_bio) +
+ /* plus the variable array for the stripes */
sizeof(struct btrfs_bio_stripe) * (total_stripes) +
+ /* plus the variable array for the tgt dev */
sizeof(int) * (real_stripes) +
- sizeof(u64) * (real_stripes),
+ /*
+ * plus the raid_map, which includes both the tgt dev
+ * and the stripes
+ */
+ sizeof(u64) * (total_stripes),
GFP_NOFS);
if (!bbio)
return NULL;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
name, name_len, -1);
if (!di && (flags & XATTR_REPLACE))
ret = -ENODATA;
+ else if (IS_ERR(di))
+ ret = PTR_ERR(di);
else if (di)
ret = btrfs_delete_one_dir_name(trans, root, path, di);
goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
ASSERT(mutex_is_locked(&inode->i_mutex));
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
- if (!di) {
+ if (!di)
ret = -ENODATA;
+ else if (IS_ERR(di))
+ ret = PTR_ERR(di);
+ if (ret)
goto out;
- }
btrfs_release_path(path);
di = NULL;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 20805db2c987..c7a5602d01ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3243,8 +3243,8 @@ int try_to_free_buffers(struct page *page)
* to synchronise against __set_page_dirty_buffers and prevent the
* dirty bit from being lost.
*/
- if (ret)
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ if (ret && TestClearPageDirty(page))
+ account_page_cleaned(page, mapping);
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index fd5599d32362..e162bcd105ee 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
inode, page, (int)pos, (int)len);
r = ceph_update_writeable_page(file, pos, len, page);
+ if (r < 0)
+ page_cache_release(page);
+ else
+ *pagep = page;
} while (r == -EAGAIN);
return r;
@@ -1198,8 +1202,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
* intercept O_DIRECT reads and writes early, this function should
* never get called.
*/
-static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter,
loff_t pos)
{
WARN_ON(1);
@@ -1535,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
- err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version),
- CEPH_OSD_CMPXATTR_OP_GT,
- CEPH_OSD_CMPXATTR_MODE_U64);
- if (err)
- goto out_put;
-
- err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
- "inline_version", &inline_version,
- sizeof(inline_version), 0, 0);
- if (err)
- goto out_put;
+ {
+ __le64 xattr_buf = cpu_to_le64(inline_version);
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &xattr_buf,
+ sizeof(xattr_buf),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+ }
+
+ {
+ char xattr_buf[32];
+ int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
+ "%llu", inline_version);
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version",
+ xattr_buf, xattr_len, 0, 0);
+ if (err)
+ goto out_put;
+ }
ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8172775428a0..11631c4c7d14 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
return ret;
}
+static void drop_inode_snap_realm(struct ceph_inode_info *ci)
+{
+ struct ceph_snap_realm *realm = ci->i_snap_realm;
+ spin_lock(&realm->inodes_with_caps_lock);
+ list_del_init(&ci->i_snap_realm_item);
+ ci->i_snap_realm_counter++;
+ ci->i_snap_realm = NULL;
+ spin_unlock(&realm->inodes_with_caps_lock);
+ ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
+ realm);
+}
+
/*
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
*
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed)
ceph_put_cap(mdsc, cap);
- if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
- struct ceph_snap_realm *realm = ci->i_snap_realm;
- spin_lock(&realm->inodes_with_caps_lock);
- list_del_init(&ci->i_snap_realm_item);
- ci->i_snap_realm_counter++;
- ci->i_snap_realm = NULL;
- spin_unlock(&realm->inodes_with_caps_lock);
- ceph_put_snap_realm(mdsc, realm);
- }
+ /* when reconnect denied, we remove session caps forcibly,
+ * i_wr_ref can be non-zero. If there are ongoing write,
+ * keep i_snap_realm.
+ */
+ if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
+
if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci);
}
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
int was = ci->i_dirty_caps;
int dirty = 0;
+ if (!ci->i_auth_cap) {
+ pr_warn("__mark_dirty_caps %p %llx mask %s, "
+ "but no auth cap (session was closed?)\n",
+ inode, ceph_ino(inode), ceph_cap_string(mask));
+ return 0;
+ }
+
dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
ceph_cap_string(mask), ceph_cap_string(was),
ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
ci->i_snap_realm->cached_context);
dout(" inode %p now dirty snapc %p auth cap %p\n",
&ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
- WARN_ON(!ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
if (!mdsc->stopping && inode->i_nlink > 0) {
if (want) {
retain |= CEPH_CAP_ANY; /* be greedy */
+ } else if (S_ISDIR(inode->i_mode) &&
+ (issued & CEPH_CAP_FILE_SHARED) &&
+ __ceph_dir_is_complete(ci)) {
+ /*
+ * If a directory is complete, we want to keep
+ * the exclusive cap. So that MDS does not end up
+ * revoking the shared cap on every create/unlink
+ * operation.
+ */
+ want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+ retain |= want;
} else {
+
retain |= CEPH_CAP_ANY_SHARED;
/*
* keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
wake = 1;
}
}
+ /* see comment in __ceph_remove_cap() */
+ if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 83e9976f7189..e729b79812b4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* can we use the dcache? */
spin_lock(&ci->i_ceph_lock);
if ((ctx->pos == 2 || fi->dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
__ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
ceph_mdsc_put_request(req);
return err;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_dentry = dget(file->f_path.dentry);
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
req->r_direct_is_hash = true;
- req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (fi->last_name) {
+ req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ return -ENOMEM;
+ }
+ }
req->r_readdir_offset = fi->next_offset;
req->r_args.readdir.frag = cpu_to_le32(frag);
+
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_dentry = dget(file->f_path.dentry);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0) {
ceph_mdsc_put_request(req);
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
+ ceph_test_mount_opt(fsc, DCACHE) &&
__ceph_dir_is_complete(ci) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(req);
goto out;
}
- req->r_dentry = dget(dentry);
- req->r_num_caps = 2;
req->r_path2 = kstrdup(dest, GFP_NOFS);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ ceph_mdsc_put_request(req);
+ goto out;
+ }
req->r_locked_dir = dir;
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
+ int op = CEPH_MDS_OP_RENAME;
int err;
if (ceph_snap(old_dir) != ceph_snap(new_dir))
return -EXDEV;
- if (ceph_snap(old_dir) != CEPH_NOSNAP ||
- ceph_snap(new_dir) != CEPH_NOSNAP)
- return -EROFS;
+ if (ceph_snap(old_dir) != CEPH_NOSNAP) {
+ if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
+ op = CEPH_MDS_OP_RENAMESNAP;
+ else
+ return -EROFS;
+ }
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
- req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+ req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
ihold(old_dir);
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
dout("dir_fsync %p wait on tid %llu (until %llu)\n",
inode, req->r_tid, last_tid);
if (req->r_timeout) {
- ret = wait_for_completion_timeout(
- &req->r_safe_completion, req->r_timeout);
- if (ret > 0)
+ unsigned long time_left = wait_for_completion_timeout(
+ &req->r_safe_completion,
+ req->r_timeout);
+ if (time_left > 0)
ret = 0;
- else if (ret == 0)
+ else
ret = -EIO; /* timed out */
} else {
wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
.getattr = ceph_getattr,
.mkdir = ceph_mkdir,
.rmdir = ceph_unlink,
+ .rename = ceph_rename,
};
const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d533075a823d..b9b8eb225f66 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
#include "super.h"
@@ -458,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (ret < 0)
return ret;
- if (file->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
while (iov_iter_count(i)) {
size_t start;
ssize_t n;
@@ -808,7 +807,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *filp = iocb->ki_filp;
struct ceph_file_info *fi = filp->private_data;
- size_t len = iocb->ki_nbytes;
+ size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *pinned_page = NULL;
@@ -829,7 +828,7 @@ again:
return ret;
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
- (iocb->ki_filp->f_flags & O_DIRECT) ||
+ (iocb->ki_flags & IOCB_DIRECT) ||
(fi->flags & CEPH_F_SYNC)) {
dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
@@ -942,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
- ssize_t count = iov_iter_count(from), written = 0;
+ ssize_t count, written = 0;
int err, want, got;
- loff_t pos = iocb->ki_pos;
+ loff_t pos;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
@@ -954,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
-
- if (count == 0)
+ err = generic_write_checks(iocb, from);
+ if (err <= 0)
goto out;
- iov_iter_truncate(from, count);
+ pos = iocb->ki_pos;
+ count = iov_iter_count(from);
err = file_remove_suid(file);
if (err)
goto out;
@@ -998,12 +995,12 @@ retry_snap:
inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
- (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+ (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
struct iov_iter data;
mutex_unlock(&inode->i_mutex);
/* we might need to revert back to that point */
data = *from;
- if (file->f_flags & O_DIRECT)
+ if (iocb->ki_flags & IOCB_DIRECT)
written = ceph_sync_direct_write(iocb, &data, pos);
else
written = ceph_sync_write(iocb, &data, pos);
@@ -1332,8 +1329,6 @@ const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
.llseek = ceph_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = ceph_read_iter,
.write_iter = ceph_write_iter,
.mmap = ceph_mmap,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 71c073f38e54..0a2eb32ffe43 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
spin_unlock(&session->s_cap_lock);
}
+static void cleanup_session_requests(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ struct ceph_mds_request *req;
+ struct rb_node *p;
+
+ dout("cleanup_session_requests mds%d\n", session->s_mds);
+ mutex_lock(&mdsc->mutex);
+ while (!list_empty(&session->s_unsafe)) {
+ req = list_first_entry(&session->s_unsafe,
+ struct ceph_mds_request, r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ pr_info(" dropping unsafe request %llu\n", req->r_tid);
+ __unregister_request(mdsc, req);
+ }
+ /* zero r_attempts, so kick_requests() will re-send requests */
+ p = rb_first(&mdsc->request_tree);
+ while (p) {
+ req = rb_entry(p, struct ceph_mds_request, r_node);
+ p = rb_next(p);
+ if (req->r_session &&
+ req->r_session->s_mds == session->s_mds)
+ req->r_attempts = 0;
+ }
+ mutex_unlock(&mdsc->mutex);
+}
+
/*
* Helper to safely iterate over all caps associated with a session, with
* special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
cap, ci, &ci->vfs_inode);
spin_lock(&ci->i_ceph_lock);
__ceph_remove_cap(cap, false);
- if (!__ceph_is_any_real_caps(ci)) {
+ if (!ci->i_auth_cap) {
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
mdsc->num_cap_flushing--;
drop = 1;
}
- if (drop && ci->i_wrbuffer_ref) {
- pr_info(" dropping dirty data for %p %lld\n",
- inode, ceph_ino(inode));
- ci->i_wrbuffer_ref = 0;
- ci->i_wrbuffer_ref_head = 0;
- drop++;
- }
spin_unlock(&mdsc->cap_dirty_lock);
}
spin_unlock(&ci->i_ceph_lock);
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
*/
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_msg *msg;
struct ceph_mds_request_head *head;
@@ -1937,6 +1957,12 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
releases += ceph_encode_inode_release(&p,
req->r_old_dentry->d_inode,
mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+
+ if (drop_cap_releases) {
+ releases = 0;
+ p = msg->front.iov_base + req->r_request_release_offset;
+ }
+
head->num_releases = cpu_to_le16(releases);
/* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
*/
static int __prepare_send_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req,
- int mds)
+ int mds, bool drop_cap_releases)
{
struct ceph_mds_request_head *rhead;
struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
- msg = create_request_message(mdsc, req, mds);
+ msg = create_request_message(mdsc, req, mds, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
- err = __prepare_send_request(mdsc, req, mds);
+ err = __prepare_send_request(mdsc, req, mds, false);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
case CEPH_SESSION_CLOSE:
if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
pr_info("mds%d reconnect denied\n", session->s_mds);
+ cleanup_session_requests(mdsc, session);
remove_session_caps(session);
wake = 2; /* for good measure */
wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req, session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
continue; /* only old requests */
if (req->r_session &&
req->r_session->s_mds == session->s_mds) {
- err = __prepare_send_request(mdsc, req, session->s_mds);
+ err = __prepare_send_request(mdsc, req,
+ session->s_mds, true);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
spin_unlock(&session->s_cap_lock);
/* trim unused caps to reduce MDS's cache rejoin time */
- shrink_dcache_parent(mdsc->fsc->sb->s_root);
+ if (mdsc->fsc->sb->s_root)
+ shrink_dcache_parent(mdsc->fsc->sb->s_root);
ceph_con_close(&session->s_con);
ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
di->lease_renew_from &&
di->lease_renew_after == 0) {
unsigned long duration =
- le32_to_cpu(h->duration_ms) * HZ / 1000;
+ msecs_to_jiffies(le32_to_cpu(h->duration_ms));
di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c
index 51cc23e48111..89e6bc321df3 100644
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
case CEPH_MDS_OP_LSSNAP: return "lssnap";
case CEPH_MDS_OP_MKSNAP: return "mksnap";
case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+ case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index a63997b8bcff..e463ebd69a9c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
fsopt->rsize = CEPH_RSIZE_DEFAULT;
fsopt->rasize = CEPH_RASIZE_DEFAULT;
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ if (!fsopt->snapdir_name) {
+ err = -ENOMEM;
+ goto out;
+ }
+
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
struct ceph_mount_options *fsopt = fsc->mount_options;
- struct ceph_options *opt = fsc->client->options;
-
- if (opt->flags & CEPH_OPT_FSID)
- seq_printf(m, ",fsid=%pU", &opt->fsid);
- if (opt->flags & CEPH_OPT_NOSHARE)
- seq_puts(m, ",noshare");
- if (opt->flags & CEPH_OPT_NOCRC)
- seq_puts(m, ",nocrc");
- if (opt->flags & CEPH_OPT_NOMSGAUTH)
- seq_puts(m, ",nocephx_require_signatures");
- if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
- seq_puts(m, ",notcp_nodelay");
-
- if (opt->name)
- seq_printf(m, ",name=%s", opt->name);
- if (opt->key)
- seq_puts(m, ",secret=<hidden>");
-
- if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
- seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
- if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
- seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
- if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
- seq_printf(m, ",osdkeepalivetimeout=%d",
- opt->osd_keepalive_timeout);
+ size_t pos;
+ int ret;
+
+ /* a comma between MNT/MS and client options */
+ seq_putc(m, ',');
+ pos = m->count;
+
+ ret = ceph_print_client_options(m, fsc->client);
+ if (ret)
+ return ret;
+
+ /* retract our comma if no client options */
+ if (m->count == pos)
+ m->count--;
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",norbytes");
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
- if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
- seq_puts(m, ",dcache");
- else
+ if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
seq_puts(m, ",nodcache");
if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
seq_puts(m, ",fsc");
- else
- seq_puts(m, ",nofsc");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+
return 0;
}
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
if (IS_ERR(req))
return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
+ if (!req->r_path1) {
+ root = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 04c8124ed30e..fa20e1318939 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
-#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
+#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
+ CEPH_MOUNT_OPT_DCACHE)
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
/* file.c */
extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5a492caf34cb..5c4c9c256931 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = PTR_ERR(req);
goto out;
}
- req->r_inode = inode;
- ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
- req->r_num_caps = 1;
+
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2) {
+ ceph_mdsc_put_request(req);
+ err = -ENOMEM;
+ goto out;
+ }
req->r_pagelist = pagelist;
pagelist = NULL;
+ req->r_inode = inode;
+ ihold(inode);
+ req->r_num_caps = 1;
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
+ req->r_path2 = kstrdup(name, GFP_NOFS);
+ if (!req->r_path2)
+ return -ENOMEM;
+
req->r_inode = inode;
ihold(inode);
- req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
req->r_num_caps = 1;
- req->r_path2 = kstrdup(name, GFP_NOFS);
-
+ req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
err = ceph_mdsc_do_request(mdsc, NULL, req);
ceph_mdsc_put_request(req);
return err;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ac7445e6ec7..aa0dc2573374 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -1,6 +1,9 @@
/*
* fs/cifs/cifsencrypt.c
*
+ * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP
+ * for more detailed information
+ *
* Copyright (C) International Business Machines Corp., 2005,2013
* Author(s): Steve French (sfrench@us.ibm.com)
*
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
__func__);
return rc;
}
- } else if (ses->serverName) {
+ } else {
+ /* We use ses->serverName if no domain name available */
len = strlen(ses->serverName);
server = kmalloc(2 + (len * 2), GFP_KERNEL);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d72fe37f5420..eaab4b2a0595 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -906,8 +906,6 @@ const struct inode_operations cifs_symlink_inode_ops = {
};
const struct file_operations cifs_file_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
@@ -926,8 +924,6 @@ const struct file_operations cifs_file_ops = {
};
const struct file_operations cifs_file_strict_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_strict_readv,
.write_iter = cifs_strict_writev,
.open = cifs_open,
@@ -947,8 +943,6 @@ const struct file_operations cifs_file_strict_ops = {
const struct file_operations cifs_file_direct_ops = {
/* BB reevaluate whether they can be done with directio, no cache */
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_user_readv,
.write_iter = cifs_user_writev,
.open = cifs_open,
@@ -967,8 +961,6 @@ const struct file_operations cifs_file_direct_ops = {
};
const struct file_operations cifs_file_nobrl_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_loose_read_iter,
.write_iter = cifs_file_write_iter,
.open = cifs_open,
@@ -986,8 +978,6 @@ const struct file_operations cifs_file_nobrl_ops = {
};
const struct file_operations cifs_file_strict_nobrl_ops = {
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_strict_readv,
.write_iter = cifs_strict_writev,
.open = cifs_open,
@@ -1006,8 +996,6 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
const struct file_operations cifs_file_direct_nobrl_ops = {
/* BB reevaluate whether they can be done with directio, no cache */
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = cifs_user_readv,
.write_iter = cifs_user_writev,
.open = cifs_open,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d3aa999ab785..f3bfe08e177b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,8 +773,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
length = atomic_dec_return(&tcpSesAllocCount);
if (length > 0)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
}
static int
@@ -848,8 +847,7 @@ cifs_demultiplex_thread(void *p)
length = atomic_inc_return(&tcpSesAllocCount);
if (length > 1)
- mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
- GFP_KERNEL);
+ mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
set_freezable();
while (server->tcpStatus != CifsExiting) {
@@ -1599,6 +1597,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
pr_warn("CIFS: username too long\n");
goto cifs_parse_mount_err;
}
+
+ kfree(vol->username);
vol->username = kstrdup(string, GFP_KERNEL);
if (!vol->username)
goto cifs_parse_mount_err;
@@ -1700,6 +1700,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
+ kfree(vol->domainname);
vol->domainname = kstrdup(string, GFP_KERNEL);
if (!vol->domainname) {
pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
}
if (strncasecmp(string, "default", 7) != 0) {
+ kfree(vol->iocharset);
vol->iocharset = kstrdup(string,
GFP_KERNEL);
if (!vol->iocharset) {
@@ -2913,8 +2915,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
* calling name ends in null (byte 16) from old smb
* convention.
*/
- if (server->workstation_RFC1001_name &&
- server->workstation_RFC1001_name[0] != 0)
+ if (server->workstation_RFC1001_name[0] != 0)
rfc1002mangle(ses_init_buf->trailer.
session_req.calling_name,
server->workstation_RFC1001_name,
@@ -3692,6 +3693,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
#endif /* CIFS_WEAK_PW_HASH */
rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
bcc_ptr, nls_codepage);
+ if (rc) {
+ cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
+ __func__, rc);
+ cifs_buf_release(smb_buffer);
+ return rc;
+ }
bcc_ptr += CIFS_AUTH_RESP_SIZE;
if (ses->capabilities & CAP_UNICODE) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a94b3e673182..ca2bc5406306 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1823,6 +1823,7 @@ refind_writable:
cifsFileInfo_put(inv_file);
spin_lock(&cifs_file_list_lock);
++refind;
+ inv_file = NULL;
goto refind_writable;
}
}
@@ -2559,10 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
return rc;
}
-static ssize_t
-cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
{
- size_t len;
+ struct file *file = iocb->ki_filp;
ssize_t total_written = 0;
struct cifsFileInfo *open_file;
struct cifs_tcon *tcon;
@@ -2572,15 +2572,15 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
struct iov_iter saved_from;
int rc;
- len = iov_iter_count(from);
- rc = generic_write_checks(file, poffset, &len, 0);
- if (rc)
- return rc;
-
- if (!len)
- return 0;
+ /*
+ * BB - optimize the way when signing is disabled. We can drop this
+ * extra memory-to-memory copying and use iovec buffers for constructing
+ * write request.
+ */
- iov_iter_truncate(from, len);
+ rc = generic_write_checks(iocb, from);
+ if (rc <= 0)
+ return rc;
INIT_LIST_HEAD(&wdata_list);
cifs_sb = CIFS_FILE_SB(file);
@@ -2592,8 +2592,8 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
memcpy(&saved_from, from, sizeof(struct iov_iter));
- rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb,
- &wdata_list);
+ rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from,
+ open_file, cifs_sb, &wdata_list);
/*
* If at least one write was successfully sent, then discard any rc
@@ -2632,7 +2632,7 @@ restart_loop:
memcpy(&tmp_from, &saved_from,
sizeof(struct iov_iter));
iov_iter_advance(&tmp_from,
- wdata->offset - *poffset);
+ wdata->offset - iocb->ki_pos);
rc = cifs_write_from_iter(wdata->offset,
wdata->bytes, &tmp_from,
@@ -2649,34 +2649,13 @@ restart_loop:
kref_put(&wdata->refcount, cifs_uncached_writedata_release);
}
- if (total_written > 0)
- *poffset += total_written;
+ if (unlikely(!total_written))
+ return rc;
+ iocb->ki_pos += total_written;
+ set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags);
cifs_stats_bytes_written(tcon, total_written);
- return total_written ? total_written : (ssize_t)rc;
-}
-
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
-{
- ssize_t written;
- struct inode *inode;
- loff_t pos = iocb->ki_pos;
-
- inode = file_inode(iocb->ki_filp);
-
- /*
- * BB - optimize the way when signing is disabled. We can drop this
- * extra memory-to-memory copying and use iovec buffers for constructing
- * write request.
- */
-
- written = cifs_iovec_write(iocb->ki_filp, from, &pos);
- if (written > 0) {
- set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
- iocb->ki_pos = pos;
- }
-
- return written;
+ return total_written;
}
static ssize_t
@@ -2687,8 +2666,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
- ssize_t rc = -EACCES;
- loff_t lock_pos = iocb->ki_pos;
+ ssize_t rc;
/*
* We need to hold the sem to be sure nobody modifies lock list
@@ -2696,23 +2674,24 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
*/
down_read(&cinode->lock_sem);
mutex_lock(&inode->i_mutex);
- if (file->f_flags & O_APPEND)
- lock_pos = i_size_read(inode);
- if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
+
+ rc = generic_write_checks(iocb, from);
+ if (rc <= 0)
+ goto out;
+
+ if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP)) {
+ CIFS_WRITE_OP))
rc = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
-
- if (rc > 0) {
- ssize_t err;
+ else
+ rc = -EACCES;
+out:
+ mutex_unlock(&inode->i_mutex);
- err = generic_write_sync(file, iocb->ki_pos - rc, rc);
- if (err < 0)
- rc = err;
- }
- } else {
- mutex_unlock(&inode->i_mutex);
+ if (rc > 0) {
+ ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+ if (err < 0)
+ rc = err;
}
up_read(&cinode->lock_sem);
return rc;
@@ -3876,8 +3855,7 @@ void cifs_oplock_break(struct work_struct *work)
* Direct IO is not yet supported in the cached mode.
*/
static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t pos)
+cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
/*
* FIXME
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2d4f37235ed0..3e126d7bb2ea 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
cifs_buf_release(srchinf->ntwrk_buf_start);
}
kfree(srchinf);
+ if (rc)
+ goto cgii_exit;
} else
goto cgii_exit;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 689f035915cf..22dfdf17d065 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
/* return pointer to beginning of data area, ie offset from SMB start */
if ((*off != 0) && (*len != 0))
- return hdr->ProtocolId + *off;
+ return (char *)(&hdr->ProtocolId[0]) + *off;
else
return NULL;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 96b5d40a2ece..eab05e1aa587 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
/* No need to change MaxChunks since already set to 1 */
chunk_sizes_updated = true;
- }
+ } else
+ goto cchunk_out;
}
cchunk_out:
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3417340bf89e..65cd7a84c8bc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
struct smb2_ioctl_req *req;
struct smb2_ioctl_rsp *rsp;
struct TCP_Server_Info *server;
- struct cifs_ses *ses = tcon->ses;
+ struct cifs_ses *ses;
struct kvec iov[2];
int resp_buftype;
int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
if (plen)
*plen = 0;
+ if (tcon)
+ ses = tcon->ses;
+ else
+ return -EIO;
+
if (ses && (ses->server))
server = ses->server;
else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
if ((rc != 0) && (rc != -EINVAL)) {
- if (tcon)
- cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+ cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
} else if (rc == -EINVAL) {
if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
(opcode != FSCTL_SRV_COPYCHUNK)) {
- if (tcon)
- cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+ cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
goto ioctl_exit;
}
}
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
- if ((rc != 0) && tcon)
+ if (rc != 0)
cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
struct kvec iov[2];
int rc = 0;
int len;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
unsigned char *bufptr;
struct TCP_Server_Info *server;
struct cifs_ses *ses = tcon->ses;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index d244d743a232..1da3805f3ddc 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -27,19 +27,14 @@
#include "coda_int.h"
static ssize_t
-coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *ppos)
+coda_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct coda_file_info *cfi;
- struct file *host_file;
+ struct file *coda_file = iocb->ki_filp;
+ struct coda_file_info *cfi = CODA_FTOC(coda_file);
- cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
- if (!host_file->f_op->read)
- return -EINVAL;
-
- return host_file->f_op->read(host_file, buf, count, ppos);
+ return vfs_iter_read(cfi->cfi_container, to, &iocb->ki_pos);
}
static ssize_t
@@ -64,32 +59,25 @@ coda_file_splice_read(struct file *coda_file, loff_t *ppos,
}
static ssize_t
-coda_file_write(struct file *coda_file, const char __user *buf, size_t count, loff_t *ppos)
+coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct inode *host_inode, *coda_inode = file_inode(coda_file);
- struct coda_file_info *cfi;
+ struct file *coda_file = iocb->ki_filp;
+ struct inode *coda_inode = file_inode(coda_file);
+ struct coda_file_info *cfi = CODA_FTOC(coda_file);
struct file *host_file;
ssize_t ret;
- cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
- host_file = cfi->cfi_container;
-
- if (!host_file->f_op->write)
- return -EINVAL;
- host_inode = file_inode(host_file);
+ host_file = cfi->cfi_container;
file_start_write(host_file);
mutex_lock(&coda_inode->i_mutex);
-
- ret = host_file->f_op->write(host_file, buf, count, ppos);
-
- coda_inode->i_size = host_inode->i_size;
+ ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
+ coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
mutex_unlock(&coda_inode->i_mutex);
file_end_write(host_file);
-
return ret;
}
@@ -231,8 +219,8 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
const struct file_operations coda_file_operations = {
.llseek = generic_file_llseek,
- .read = coda_file_read,
- .write = coda_file_write,
+ .read_iter = coda_file_read_iter,
+ .write_iter = coda_file_write_iter,
.mmap = coda_file_mmap,
.open = coda_open,
.release = coda_release,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index afec6450450f..6b8e2f091f5b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -570,6 +570,7 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
#define BNEPCONNDEL _IOW('B', 201, int)
#define BNEPGETCONNLIST _IOR('B', 210, int)
#define BNEPGETCONNINFO _IOR('B', 211, int)
+#define BNEPGETSUPPFEAT _IOR('B', 212, int)
#define CMTPCONNADD _IOW('C', 200, int)
#define CMTPCONNDEL _IOW('C', 201, int)
@@ -1247,6 +1248,7 @@ COMPATIBLE_IOCTL(BNEPCONNADD)
COMPATIBLE_IOCTL(BNEPCONNDEL)
COMPATIBLE_IOCTL(BNEPGETCONNLIST)
COMPATIBLE_IOCTL(BNEPGETCONNINFO)
+COMPATIBLE_IOCTL(BNEPGETSUPPFEAT)
COMPATIBLE_IOCTL(CMTPCONNADD)
COMPATIBLE_IOCTL(CMTPCONNDEL)
COMPATIBLE_IOCTL(CMTPGETCONNLIST)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cf0db005d2f5..acb3d63bc9dc 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- mutex_unlock(&file_inode(file)->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
diff --git a/fs/coredump.c b/fs/coredump.c
index f319926ddf8c..bbbe139ab280 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -657,7 +657,7 @@ void do_coredump(const siginfo_t *siginfo)
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
- if (!cprm.file->f_op->write)
+ if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
goto close_fail;
diff --git a/fs/dax.c b/fs/dax.c
index ed1619ec6537..0bb0aecb556c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh)
return bh->b_state != 0;
}
-static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
- loff_t start, loff_t end, get_block_t get_block,
- struct buffer_head *bh)
+static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
+ loff_t start, loff_t end, get_block_t get_block,
+ struct buffer_head *bh)
{
ssize_t retval = 0;
loff_t pos = start;
@@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
void *addr;
bool hole = false;
- if (rw != WRITE)
+ if (iov_iter_rw(iter) != WRITE)
end = min(end, i_size_read(inode));
while (pos < end) {
@@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
bh->b_size = PAGE_ALIGN(end - pos);
bh->b_state = 0;
retval = get_block(inode, block, bh,
- rw == WRITE);
+ iov_iter_rw(iter) == WRITE);
if (retval)
break;
if (!buffer_size_valid(bh))
@@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
bh->b_size -= done;
}
- hole = (rw != WRITE) && !buffer_written(bh);
+ hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh);
if (hole) {
addr = NULL;
size = bh->b_size - first;
@@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
max = min(pos + size, end);
}
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
len = copy_from_iter(addr, max - pos, iter);
else if (!hole)
len = copy_to_iter(addr, max - pos, iter);
@@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
/**
* dax_do_io - Perform I/O to a DAX file
- * @rw: READ to read or WRITE to write
* @iocb: The control block for this I/O
* @inode: The file which the I/O is directed at
* @iter: The addresses to do I/O from or to
@@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter,
* As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
* is in progress.
*/
-ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
- struct iov_iter *iter, loff_t pos,
- get_block_t get_block, dio_iodone_t end_io, int flags)
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+ struct iov_iter *iter, loff_t pos, get_block_t get_block,
+ dio_iodone_t end_io, int flags)
{
struct buffer_head bh;
ssize_t retval = -EINVAL;
@@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
memset(&bh, 0, sizeof(bh));
- if ((flags & DIO_LOCKING) && (rw == READ)) {
+ if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
struct address_space *mapping = inode->i_mapping;
mutex_lock(&inode->i_mutex);
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
@@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode,
/* Protects against truncate */
atomic_inc(&inode->i_dio_count);
- retval = dax_io(rw, inode, iter, pos, end, get_block, &bh);
+ retval = dax_io(inode, iter, pos, end, get_block, &bh);
- if ((flags & DIO_LOCKING) && (rw == READ))
+ if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
mutex_unlock(&inode->i_mutex);
if ((retval > 0) && end_io)
@@ -464,6 +463,23 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
EXPORT_SYMBOL_GPL(dax_fault);
/**
+ * dax_pfn_mkwrite - handle first write to DAX page
+ * @vma: The virtual memory area where the fault occurred
+ * @vmf: The description of the fault
+ *
+ */
+int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ sb_end_pagefault(sb);
+ return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
+
+/**
* dax_zero_page_range - zero a range within a page of a DAX file
* @inode: The file being truncated
* @from: The file offset that is being truncated to
diff --git a/fs/dcache.c b/fs/dcache.c
index c71e3732e53b..656ce522a218 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry)
return dentry->d_name.name != dentry->d_iname;
}
+/*
+ * Make sure other CPUs see the inode attached before the type is set.
+ */
+static inline void __d_set_inode_and_type(struct dentry *dentry,
+ struct inode *inode,
+ unsigned type_flags)
+{
+ unsigned flags;
+
+ dentry->d_inode = inode;
+ smp_wmb();
+ flags = READ_ONCE(dentry->d_flags);
+ flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+ flags |= type_flags;
+ WRITE_ONCE(dentry->d_flags, flags);
+}
+
+/*
+ * Ideally, we want to make sure that other CPUs see the flags cleared before
+ * the inode is detached, but this is really a violation of RCU principles
+ * since the ordering suggests we should always set inode before flags.
+ *
+ * We should instead replace or discard the entire dentry - but that sucks
+ * performancewise on mass deletion/rename.
+ */
+static inline void __d_clear_type_and_inode(struct dentry *dentry)
+{
+ unsigned flags = READ_ONCE(dentry->d_flags);
+
+ flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
+ WRITE_ONCE(dentry->d_flags, flags);
+ smp_wmb();
+ dentry->d_inode = NULL;
+}
+
static void dentry_free(struct dentry *dentry)
{
WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
@@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
if (inode) {
- dentry->d_inode = NULL;
+ __d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
@@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
- __d_clear_type(dentry);
- dentry->d_inode = NULL;
+ __d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
@@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
unsigned add_flags = d_flags_for_inode(inode);
spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
- dentry->d_flags |= add_flags;
if (inode)
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
- dentry->d_inode = inode;
+ __d_set_inode_and_type(dentry, inode, add_flags);
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
@@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
add_flags |= DCACHE_DISCONNECTED;
spin_lock(&tmp->d_lock);
- tmp->d_inode = inode;
- tmp->d_flags |= add_flags;
+ __d_set_inode_and_type(tmp, inode, add_flags);
hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
@@ -2690,7 +2721,7 @@ static int __d_unalias(struct inode *inode,
struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
- int ret = -EBUSY;
+ int ret = -ESTALE;
/* If alias and dentry share a parent, then no extra locks required */
if (alias->d_parent == dentry->d_parent)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 96400ab42d13..c9ee0dfe90b5 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -254,6 +254,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
pr_debug("debugfs: creating file '%s'\n",name);
+ if (IS_ERR(parent))
+ return parent;
+
error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
&debugfs_mount_count);
if (error)
@@ -521,7 +524,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
if (debugfs_positive(dentry)) {
dget(dentry);
- if (S_ISDIR(dentry->d_inode->i_mode))
+ if (d_is_dir(dentry))
ret = simple_rmdir(parent->d_inode, dentry);
else
simple_unlink(parent->d_inode, dentry);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b2e297..c3b560b24a46 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
#include <linux/uio.h>
#include <linux/atomic.h>
#include <linux/prefetch.h>
-#include <linux/aio.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
@@ -265,7 +264,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
ret = err;
}
- aio_complete(dio->iocb, ret, 0);
+ dio->iocb->ki_complete(dio->iocb, ret, 0);
}
kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1055,7 @@ static inline int drop_refcount(struct dio *dio)
* operation. AIO can if it was a broken operation described above or
* in fact if all the bios race to complete before we get here. In
* that case dio_complete() translates the EIOCBQUEUED into the proper
- * return code that the caller will hand to aio_complete().
+ * return code that the caller will hand to ->complete().
*
* This is managed by the bio_lock instead of being an atomic_t so that
* completion paths can drop their ref and use the remaining count to
@@ -1094,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio)
* for the whole file.
*/
static inline ssize_t
-do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, struct iov_iter *iter, loff_t offset,
- get_block_t get_block, dio_iodone_t end_io,
- dio_submit_t submit_io, int flags)
+do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, struct iov_iter *iter,
+ loff_t offset, get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
{
unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
unsigned blkbits = i_blkbits;
@@ -1111,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
- if (rw & WRITE)
- rw = WRITE_ODIRECT;
-
/*
* Avoid references to bdev if not absolutely needed to give
* the early prefetch in the caller enough time.
@@ -1128,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
}
/* watch out for a 0 len io from a tricksy fs */
- if (rw == READ && !iov_iter_count(iter))
+ if (iov_iter_rw(iter) == READ && !iov_iter_count(iter))
return 0;
dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
@@ -1144,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
dio->flags = flags;
if (dio->flags & DIO_LOCKING) {
- if (rw == READ) {
+ if (iov_iter_rw(iter) == READ) {
struct address_space *mapping =
iocb->ki_filp->f_mapping;
@@ -1170,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (is_sync_kiocb(iocb))
dio->is_async = false;
else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
- (rw & WRITE) && end > i_size_read(inode))
+ iov_iter_rw(iter) == WRITE && end > i_size_read(inode))
dio->is_async = false;
else
dio->is_async = true;
dio->inode = inode;
- dio->rw = rw;
+ dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ;
/*
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
* so that we can call ->fsync.
*/
- if (dio->is_async && (rw & WRITE) &&
+ if (dio->is_async && iov_iter_rw(iter) == WRITE &&
((iocb->ki_filp->f_flags & O_DSYNC) ||
IS_SYNC(iocb->ki_filp->f_mapping->host))) {
retval = dio_set_defer_completion(dio);
@@ -1275,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
* we can let i_mutex go now that its achieved its purpose
* of protecting us from looking up uninitialized blocks.
*/
- if (rw == READ && (dio->flags & DIO_LOCKING))
+ if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
mutex_unlock(&dio->inode->i_mutex);
/*
@@ -1287,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
*/
BUG_ON(retval == -EIOCBQUEUED);
if (dio->is_async && retval == 0 && dio->result &&
- (rw == READ || dio->result == count))
+ (iov_iter_rw(iter) == READ || dio->result == count))
retval = -EIOCBQUEUED;
else
dio_await_completion(dio);
@@ -1301,11 +1297,11 @@ out:
return retval;
}
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, struct iov_iter *iter, loff_t offset,
- get_block_t get_block, dio_iodone_t end_io,
- dio_submit_t submit_io, int flags)
+ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, struct iov_iter *iter,
+ loff_t offset, get_block_t get_block,
+ dio_iodone_t end_io, dio_submit_t submit_io,
+ int flags)
{
/*
* The block device state is needed in the end to finally
@@ -1319,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
prefetch(bdev->bd_queue);
prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
- return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
- get_block, end_io, submit_io, flags);
+ return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block,
+ end_io, submit_io, flags);
}
EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 90d1882b306f..5ba029e627cc 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key)
}
#define ECRYPTFS_MAX_KEYSET_SIZE 1024
-#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
+#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31
#define ECRYPTFS_MAX_NUM_ENC_KEYS 64
#define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */
#define ECRYPTFS_SALT_BYTES 2
@@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat {
struct crypto_ablkcipher *tfm;
struct crypto_hash *hash_tfm; /* Crypto context for generating
* the initialization vectors */
- unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+ unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
struct list_head keysig_list;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b07731e68c0b..a65786e26b05 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
-#include <linux/aio.h>
#include "ecryptfs_kernel.h"
/**
@@ -52,12 +51,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
rc = generic_file_read_iter(iocb, to);
- /*
- * Even though this is a async interface, we need to wait
- * for IO to finish to update atime
- */
- if (-EIOCBQUEUED == rc)
- rc = wait_on_sync_kiocb(iocb);
if (rc >= 0) {
path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
touch_atime(path);
@@ -303,9 +296,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOTTY;
- if (lower_file->f_op->unlocked_ioctl)
+ if (!lower_file->f_op->unlocked_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ case FS_IOC_GETVERSION:
+ case FS_IOC_SETVERSION:
rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#ifdef CONFIG_COMPAT
@@ -315,9 +321,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct file *lower_file = ecryptfs_file_to_lower(file);
long rc = -ENOIOCTLCMD;
- if (lower_file->f_op->compat_ioctl)
+ if (!lower_file->f_op->compat_ioctl)
+ return rc;
+
+ switch (cmd) {
+ case FITRIM:
+ case FS_IOC32_GETFLAGS:
+ case FS_IOC32_SETFLAGS:
+ case FS_IOC32_GETVERSION:
+ case FS_IOC32_SETVERSION:
rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
- return rc;
+ fsstack_copy_attr_all(file_inode(file), file_inode(lower_file));
+
+ return rc;
+ default:
+ return rc;
+ }
}
#endif
@@ -339,9 +358,7 @@ const struct file_operations ecryptfs_dir_fops = {
const struct file_operations ecryptfs_main_fops = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = ecryptfs_read_update_atime,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.iterate = ecryptfs_readdir,
.unlocked_ioctl = ecryptfs_unlocked_ioctl,
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 917bd5c9776a..6bd67e2011f0 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
struct blkcipher_desc desc;
char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
char iv[ECRYPTFS_MAX_IV_BYTES];
- char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
+ char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
};
/**
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1895d60f4122..c095d3264259 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
if (!cipher_name_set) {
int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
- BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+ BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE);
strcpy(mount_crypt_stat->global_default_cipher_name,
ECRYPTFS_DEFAULT_CIPHER);
}
diff --git a/fs/exec.c b/fs/exec.c
index c7f9b733406d..49a1c61433b7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -926,10 +926,14 @@ static int de_thread(struct task_struct *tsk)
if (!thread_group_leader(tsk)) {
struct task_struct *leader = tsk->group_leader;
- sig->notify_count = -1; /* for exit_notify() */
for (;;) {
threadgroup_change_begin(tsk);
write_lock_irq(&tasklist_lock);
+ /*
+ * Do this under tasklist_lock to ensure that
+ * exit_notify() can't miss ->group_exit_task
+ */
+ sig->notify_count = -1;
if (likely(leader->exit_state))
break;
__set_current_state(TASK_KILLABLE);
@@ -1078,7 +1082,13 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval)
goto out;
+ /*
+ * Must be called _before_ exec_mmap() as bprm->mm is
+ * not visibile until then. This also enables the update
+ * to be lockless.
+ */
set_mm_exe_file(bprm->mm, bprm->file);
+
/*
* Release all of the old mmap stuff
*/
@@ -1265,6 +1275,53 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
spin_unlock(&p->fs->lock);
}
+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+ struct inode *inode;
+ unsigned int mode;
+ kuid_t uid;
+ kgid_t gid;
+
+ /* clear any previous set[ug]id data from a previous binary */
+ bprm->cred->euid = current_euid();
+ bprm->cred->egid = current_egid();
+
+ if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+ return;
+
+ if (task_no_new_privs(current))
+ return;
+
+ inode = file_inode(bprm->file);
+ mode = READ_ONCE(inode->i_mode);
+ if (!(mode & (S_ISUID|S_ISGID)))
+ return;
+
+ /* Be careful if suid/sgid is set */
+ mutex_lock(&inode->i_mutex);
+
+ /* reload atomically mode/uid/gid now that lock held */
+ mode = inode->i_mode;
+ uid = inode->i_uid;
+ gid = inode->i_gid;
+ mutex_unlock(&inode->i_mutex);
+
+ /* We ignore suid/sgid if there are no mappings for them in the ns */
+ if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
+ !kgid_has_mapping(bprm->cred->user_ns, gid))
+ return;
+
+ if (mode & S_ISUID) {
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->euid = uid;
+ }
+
+ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+ bprm->per_clear |= PER_CLEAR_ON_SETID;
+ bprm->cred->egid = gid;
+ }
+}
+
/*
* Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,36 +1330,9 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
*/
int prepare_binprm(struct linux_binprm *bprm)
{
- struct inode *inode = file_inode(bprm->file);
- umode_t mode = inode->i_mode;
int retval;
-
- /* clear any previous set[ug]id data from a previous binary */
- bprm->cred->euid = current_euid();
- bprm->cred->egid = current_egid();
-
- if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
- !task_no_new_privs(current) &&
- kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
- kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
- /* Set-uid? */
- if (mode & S_ISUID) {
- bprm->per_clear |= PER_CLEAR_ON_SETID;
- bprm->cred->euid = inode->i_uid;
- }
-
- /* Set-gid? */
- /*
- * If setgid is set but no group execute bit then this
- * is a candidate for mandatory locking, not a setgid
- * executable.
- */
- if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
- bprm->per_clear |= PER_CLEAR_ON_SETID;
- bprm->cred->egid = inode->i_gid;
- }
- }
+ bprm_fill_uid(bprm);
/* fill in binprm security blob */
retval = security_bprm_set_creds(bprm);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 1a376b42d305..906de66e8e7e 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -67,8 +67,6 @@ static int exofs_flush(struct file *file, fl_owner_t id)
const struct file_operations exofs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a198e94813fe..35073aaec6e0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
/* TODO: Should be easy enough to do proprly */
-static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
return 0;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 678f9ab08c48..8d15febd0aa3 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -793,7 +793,6 @@ extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
int datasync);
extern const struct inode_operations ext2_file_inode_operations;
extern const struct file_operations ext2_file_operations;
-extern const struct file_operations ext2_dax_file_operations;
/* inode.c */
extern const struct address_space_operations ext2_aops;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index e31701713516..3a0a6c6406d0 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -39,6 +39,7 @@ static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
.page_mkwrite = ext2_dax_mkwrite,
+ .pfn_mkwrite = dax_pfn_mkwrite,
};
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -92,8 +93,6 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
@@ -108,24 +107,6 @@ const struct file_operations ext2_file_operations = {
.splice_write = iter_file_splice_write,
};
-#ifdef CONFIG_FS_DAX
-const struct file_operations ext2_dax_file_operations = {
- .llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
- .read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
- .unlocked_ioctl = ext2_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext2_compat_ioctl,
-#endif
- .mmap = ext2_file_mmap,
- .open = dquot_file_open,
- .release = ext2_release_file,
- .fsync = ext2_fsync,
-};
-#endif
-
const struct inode_operations ext2_file_inode_operations = {
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc000125..5d9213963fae 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "ext2.h"
#include "acl.h"
#include "xattr.h"
@@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
}
static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -861,12 +860,12 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
ssize_t ret;
if (IS_DAX(inode))
- ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block,
- NULL, DIO_LOCKING);
+ ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
+ DIO_LOCKING);
else
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+ ret = blockdev_direct_IO(iocb, inode, iter, offset,
ext2_get_block);
- if (ret < 0 && (rw & WRITE))
+ if (ret < 0 && iov_iter_rw(iter) == WRITE)
ext2_write_failed(mapping, offset + count);
return ret;
}
@@ -1388,10 +1387,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext2_file_inode_operations;
- if (test_opt(inode->i_sb, DAX)) {
- inode->i_mapping->a_ops = &ext2_aops;
- inode->i_fop = &ext2_dax_file_operations;
- } else if (test_opt(inode->i_sb, NOBH)) {
+ if (test_opt(inode->i_sb, NOBH)) {
inode->i_mapping->a_ops = &ext2_nobh_aops;
inode->i_fop = &ext2_file_operations;
} else {
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 148f6e3789ea..ce422931f411 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,10 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
return PTR_ERR(inode);
inode->i_op = &ext2_file_inode_operations;
- if (test_opt(inode->i_sb, DAX)) {
- inode->i_mapping->a_ops = &ext2_aops;
- inode->i_fop = &ext2_dax_file_operations;
- } else if (test_opt(inode->i_sb, NOBH)) {
+ if (test_opt(inode->i_sb, NOBH)) {
inode->i_mapping->a_ops = &ext2_nobh_aops;
inode->i_fop = &ext2_file_operations;
} else {
@@ -125,10 +122,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
return PTR_ERR(inode);
inode->i_op = &ext2_file_inode_operations;
- if (test_opt(inode->i_sb, DAX)) {
- inode->i_mapping->a_ops = &ext2_aops;
- inode->i_fop = &ext2_dax_file_operations;
- } else if (test_opt(inode->i_sb, NOBH)) {
+ if (test_opt(inode->i_sb, NOBH)) {
inode->i_mapping->a_ops = &ext2_nobh_aops;
inode->i_fop = &ext2_file_operations;
} else {
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index a062fa1e1b11..3b8f650de22c 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -50,8 +50,6 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
const struct file_operations ext3_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.unlocked_ioctl = ext3_ioctl,
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc49ba27..13c0868c7160 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "ext3.h"
#include "xattr.h"
#include "acl.h"
@@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
* crashes then stale disk data _may_ be exposed inside the file. But current
* VFS code falls back into buffered path in that case so we are safe.
*/
-static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
int retries = 0;
- trace_ext3_direct_IO_enter(inode, offset, count, rw);
+ trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
- if (rw == WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
loff_t final_size = offset + count;
if (final_size > inode->i_size) {
@@ -1856,12 +1856,12 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
@@ -1908,7 +1908,7 @@ retry:
ret = err;
}
out:
- trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
+ trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
return ret;
}
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index d4dbf3c259b3..f037b4b27300 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -789,7 +789,7 @@ static const struct quotactl_ops ext3_qctl_operations = {
.quota_on = ext3_quota_on,
.quota_off = dquot_quota_off,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index c6874be6d58b..24215dc09a18 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -546,8 +546,7 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
free += EXT3_XATTR_LEN(name_len);
}
if (i->value) {
- if (free < EXT3_XATTR_SIZE(i->value_len) ||
- free < EXT3_XATTR_LEN(name_len) +
+ if (free < EXT3_XATTR_LEN(name_len) +
EXT3_XATTR_SIZE(i->value_len))
return -ENOSPC;
}
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index efea5d5c44ce..18228c201f7f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -64,6 +64,23 @@ config EXT4_FS_SECURITY
If you are not using a security module that requires using
extended attributes for file security labels, say N.
+config EXT4_FS_ENCRYPTION
+ bool "Ext4 Encryption"
+ depends on EXT4_FS
+ select CRYPTO_AES
+ select CRYPTO_CBC
+ select CRYPTO_ECB
+ select CRYPTO_XTS
+ select CRYPTO_CTS
+ select CRYPTO_SHA256
+ select KEYS
+ select ENCRYPTED_KEYS
+ help
+ Enable encryption of ext4 files and directories. This
+ feature is similar to ecryptfs, but it is more memory
+ efficient since it avoids caching the encrypted and
+ decrypted pages in the page cache.
+
config EXT4_DEBUG
bool "EXT4 debugging support"
depends on EXT4_FS
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 0310fec2ee3d..75285ea9aa05 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,9 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
- xattr_trusted.o inline.o
+ xattr_trusted.o inline.o readpage.o
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
+ext4-$(CONFIG_EXT4_FS_ENCRYPTION) += crypto_policy.o crypto.o \
+ crypto_key.o crypto_fname.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index d40c8dbbb0d6..69b1e73026a5 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -4,11 +4,6 @@
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*/
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f497c4e0..955bf49a7945 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -14,7 +14,6 @@
#include <linux/time.h>
#include <linux/capability.h>
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include "ext4.h"
@@ -641,8 +640,6 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
* fail EDQUOT for metdata, but we do account for it.
*/
if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
dquot_alloc_block_nofail(inode,
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b610779a958c..4a606afb171f 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -8,7 +8,6 @@
*/
#include <linux/buffer_head.h>
-#include <linux/jbd2.h>
#include "ext4.h"
unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 41eb9dcfac7e..3522340c7a99 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/blkdev.h>
-#include <linux/mutex.h>
#include <linux/slab.h>
#include "ext4.h"
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
new file mode 100644
index 000000000000..8ff15273ab0c
--- /dev/null
+++ b/fs/ext4/crypto.c
@@ -0,0 +1,558 @@
+/*
+ * linux/fs/ext4/crypto.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption functions for ext4
+ *
+ * Written by Michael Halcrow, 2014.
+ *
+ * Filename encryption additions
+ * Uday Savagaonkar, 2014
+ * Encryption policy handling additions
+ * Ildar Muslukhov, 2014
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ * The usage of AES-XTS should conform to recommendations in NIST
+ * Special Publication 800-38E and IEEE P1619/D16.
+ */
+
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/user-type.h>
+#include <keys/encrypted-type.h>
+#include <linux/crypto.h>
+#include <linux/ecryptfs.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock_types.h>
+
+#include "ext4_extents.h"
+#include "xattr.h"
+
+/* Encryption added and removed here! (L: */
+
+static unsigned int num_prealloc_crypto_pages = 32;
+static unsigned int num_prealloc_crypto_ctxs = 128;
+
+module_param(num_prealloc_crypto_pages, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_pages,
+ "Number of crypto pages to preallocate");
+module_param(num_prealloc_crypto_ctxs, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
+ "Number of crypto contexts to preallocate");
+
+static mempool_t *ext4_bounce_page_pool;
+
+static LIST_HEAD(ext4_free_crypto_ctxs);
+static DEFINE_SPINLOCK(ext4_crypto_ctx_lock);
+
+/**
+ * ext4_release_crypto_ctx() - Releases an encryption context
+ * @ctx: The encryption context to release.
+ *
+ * If the encryption context was allocated from the pre-allocated pool, returns
+ * it to that pool. Else, frees it.
+ *
+ * If there's a bounce page in the context, this frees that.
+ */
+void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
+{
+ unsigned long flags;
+
+ if (ctx->bounce_page) {
+ if (ctx->flags & EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL)
+ __free_page(ctx->bounce_page);
+ else
+ mempool_free(ctx->bounce_page, ext4_bounce_page_pool);
+ ctx->bounce_page = NULL;
+ }
+ ctx->control_page = NULL;
+ if (ctx->flags & EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL) {
+ if (ctx->tfm)
+ crypto_free_tfm(ctx->tfm);
+ kfree(ctx);
+ } else {
+ spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
+ list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
+ spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
+ }
+}
+
+/**
+ * ext4_alloc_and_init_crypto_ctx() - Allocates and inits an encryption context
+ * @mask: The allocation mask.
+ *
+ * Return: An allocated and initialized encryption context on success. An error
+ * value or NULL otherwise.
+ */
+static struct ext4_crypto_ctx *ext4_alloc_and_init_crypto_ctx(gfp_t mask)
+{
+ struct ext4_crypto_ctx *ctx = kzalloc(sizeof(struct ext4_crypto_ctx),
+ mask);
+
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ return ctx;
+}
+
+/**
+ * ext4_get_crypto_ctx() - Gets an encryption context
+ * @inode: The inode for which we are doing the crypto
+ *
+ * Allocates and initializes an encryption context.
+ *
+ * Return: An allocated and initialized encryption context on success; error
+ * value or NULL otherwise.
+ */
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
+{
+ struct ext4_crypto_ctx *ctx = NULL;
+ int res = 0;
+ unsigned long flags;
+ struct ext4_encryption_key *key = &EXT4_I(inode)->i_encryption_key;
+
+ if (!ext4_read_workqueue)
+ ext4_init_crypto();
+
+ /*
+ * We first try getting the ctx from a free list because in
+ * the common case the ctx will have an allocated and
+ * initialized crypto tfm, so it's probably a worthwhile
+ * optimization. For the bounce page, we first try getting it
+ * from the kernel allocator because that's just about as fast
+ * as getting it from a list and because a cache of free pages
+ * should generally be a "last resort" option for a filesystem
+ * to be able to do its job.
+ */
+ spin_lock_irqsave(&ext4_crypto_ctx_lock, flags);
+ ctx = list_first_entry_or_null(&ext4_free_crypto_ctxs,
+ struct ext4_crypto_ctx, free_list);
+ if (ctx)
+ list_del(&ctx->free_list);
+ spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
+ if (!ctx) {
+ ctx = ext4_alloc_and_init_crypto_ctx(GFP_NOFS);
+ if (IS_ERR(ctx)) {
+ res = PTR_ERR(ctx);
+ goto out;
+ }
+ ctx->flags |= EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ } else {
+ ctx->flags &= ~EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ }
+
+ /* Allocate a new Crypto API context if we don't already have
+ * one or if it isn't the right mode. */
+ BUG_ON(key->mode == EXT4_ENCRYPTION_MODE_INVALID);
+ if (ctx->tfm && (ctx->mode != key->mode)) {
+ crypto_free_tfm(ctx->tfm);
+ ctx->tfm = NULL;
+ ctx->mode = EXT4_ENCRYPTION_MODE_INVALID;
+ }
+ if (!ctx->tfm) {
+ switch (key->mode) {
+ case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+ ctx->tfm = crypto_ablkcipher_tfm(
+ crypto_alloc_ablkcipher("xts(aes)", 0, 0));
+ break;
+ case EXT4_ENCRYPTION_MODE_AES_256_GCM:
+ /* TODO(mhalcrow): AEAD w/ gcm(aes);
+ * crypto_aead_setauthsize() */
+ ctx->tfm = ERR_PTR(-ENOTSUPP);
+ break;
+ default:
+ BUG();
+ }
+ if (IS_ERR_OR_NULL(ctx->tfm)) {
+ res = PTR_ERR(ctx->tfm);
+ ctx->tfm = NULL;
+ goto out;
+ }
+ ctx->mode = key->mode;
+ }
+ BUG_ON(key->size != ext4_encryption_key_size(key->mode));
+
+ /* There shouldn't be a bounce page attached to the crypto
+ * context at this point. */
+ BUG_ON(ctx->bounce_page);
+
+out:
+ if (res) {
+ if (!IS_ERR_OR_NULL(ctx))
+ ext4_release_crypto_ctx(ctx);
+ ctx = ERR_PTR(res);
+ }
+ return ctx;
+}
+
+struct workqueue_struct *ext4_read_workqueue;
+static DEFINE_MUTEX(crypto_init);
+
+/**
+ * ext4_exit_crypto() - Shutdown the ext4 encryption system
+ */
+void ext4_exit_crypto(void)
+{
+ struct ext4_crypto_ctx *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &ext4_free_crypto_ctxs, free_list) {
+ if (pos->bounce_page) {
+ if (pos->flags &
+ EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL) {
+ __free_page(pos->bounce_page);
+ } else {
+ mempool_free(pos->bounce_page,
+ ext4_bounce_page_pool);
+ }
+ }
+ if (pos->tfm)
+ crypto_free_tfm(pos->tfm);
+ kfree(pos);
+ }
+ INIT_LIST_HEAD(&ext4_free_crypto_ctxs);
+ if (ext4_bounce_page_pool)
+ mempool_destroy(ext4_bounce_page_pool);
+ ext4_bounce_page_pool = NULL;
+ if (ext4_read_workqueue)
+ destroy_workqueue(ext4_read_workqueue);
+ ext4_read_workqueue = NULL;
+}
+
+/**
+ * ext4_init_crypto() - Set up for ext4 encryption.
+ *
+ * We only call this when we start accessing encrypted files, since it
+ * results in memory getting allocated that wouldn't otherwise be used.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int ext4_init_crypto(void)
+{
+ int i, res;
+
+ mutex_lock(&crypto_init);
+ if (ext4_read_workqueue)
+ goto already_initialized;
+ ext4_read_workqueue = alloc_workqueue("ext4_crypto", WQ_HIGHPRI, 0);
+ if (!ext4_read_workqueue) {
+ res = -ENOMEM;
+ goto fail;
+ }
+
+ for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
+ struct ext4_crypto_ctx *ctx;
+
+ ctx = ext4_alloc_and_init_crypto_ctx(GFP_KERNEL);
+ if (IS_ERR(ctx)) {
+ res = PTR_ERR(ctx);
+ goto fail;
+ }
+ list_add(&ctx->free_list, &ext4_free_crypto_ctxs);
+ }
+
+ ext4_bounce_page_pool =
+ mempool_create_page_pool(num_prealloc_crypto_pages, 0);
+ if (!ext4_bounce_page_pool) {
+ res = -ENOMEM;
+ goto fail;
+ }
+already_initialized:
+ mutex_unlock(&crypto_init);
+ return 0;
+fail:
+ ext4_exit_crypto();
+ mutex_unlock(&crypto_init);
+ return res;
+}
+
+void ext4_restore_control_page(struct page *data_page)
+{
+ struct ext4_crypto_ctx *ctx =
+ (struct ext4_crypto_ctx *)page_private(data_page);
+
+ set_page_private(data_page, (unsigned long)NULL);
+ ClearPagePrivate(data_page);
+ unlock_page(data_page);
+ ext4_release_crypto_ctx(ctx);
+}
+
+/**
+ * ext4_crypt_complete() - The completion callback for page encryption
+ * @req: The asynchronous encryption request context
+ * @res: The result of the encryption operation
+ */
+static void ext4_crypt_complete(struct crypto_async_request *req, int res)
+{
+ struct ext4_completion_result *ecr = req->data;
+
+ if (res == -EINPROGRESS)
+ return;
+ ecr->res = res;
+ complete(&ecr->completion);
+}
+
+typedef enum {
+ EXT4_DECRYPT = 0,
+ EXT4_ENCRYPT,
+} ext4_direction_t;
+
+static int ext4_page_crypto(struct ext4_crypto_ctx *ctx,
+ struct inode *inode,
+ ext4_direction_t rw,
+ pgoff_t index,
+ struct page *src_page,
+ struct page *dest_page)
+
+{
+ u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
+ struct ablkcipher_request *req = NULL;
+ DECLARE_EXT4_COMPLETION_RESULT(ecr);
+ struct scatterlist dst, src;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct crypto_ablkcipher *atfm = __crypto_ablkcipher_cast(ctx->tfm);
+ int res = 0;
+
+ BUG_ON(!ctx->tfm);
+ BUG_ON(ctx->mode != ei->i_encryption_key.mode);
+
+ if (ctx->mode != EXT4_ENCRYPTION_MODE_AES_256_XTS) {
+ printk_ratelimited(KERN_ERR
+ "%s: unsupported crypto algorithm: %d\n",
+ __func__, ctx->mode);
+ return -ENOTSUPP;
+ }
+
+ crypto_ablkcipher_clear_flags(atfm, ~0);
+ crypto_tfm_set_flags(ctx->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+
+ res = crypto_ablkcipher_setkey(atfm, ei->i_encryption_key.raw,
+ ei->i_encryption_key.size);
+ if (res) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_ablkcipher_setkey() failed\n",
+ __func__);
+ return res;
+ }
+ req = ablkcipher_request_alloc(atfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_request_alloc() failed\n",
+ __func__);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(
+ req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ ext4_crypt_complete, &ecr);
+
+ BUILD_BUG_ON(EXT4_XTS_TWEAK_SIZE < sizeof(index));
+ memcpy(xts_tweak, &index, sizeof(index));
+ memset(&xts_tweak[sizeof(index)], 0,
+ EXT4_XTS_TWEAK_SIZE - sizeof(index));
+
+ sg_init_table(&dst, 1);
+ sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+ sg_init_table(&src, 1);
+ sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
+ ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
+ xts_tweak);
+ if (rw == EXT4_DECRYPT)
+ res = crypto_ablkcipher_decrypt(req);
+ else
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ ablkcipher_request_free(req);
+ if (res) {
+ printk_ratelimited(
+ KERN_ERR
+ "%s: crypto_ablkcipher_encrypt() returned %d\n",
+ __func__, res);
+ return res;
+ }
+ return 0;
+}
+
+/**
+ * ext4_encrypt() - Encrypts a page
+ * @inode: The inode for which the encryption should take place
+ * @plaintext_page: The page to encrypt. Must be locked.
+ *
+ * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
+ * encryption context.
+ *
+ * Called on the page write path. The caller must call
+ * ext4_restore_control_page() on the returned ciphertext page to
+ * release the bounce buffer and the encryption context.
+ *
+ * Return: An allocated page with the encrypted content on success. Else, an
+ * error value or NULL.
+ */
+struct page *ext4_encrypt(struct inode *inode,
+ struct page *plaintext_page)
+{
+ struct ext4_crypto_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ int err;
+
+ BUG_ON(!PageLocked(plaintext_page));
+
+ ctx = ext4_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ return (struct page *) ctx;
+
+ /* The encryption operation will require a bounce page. */
+ ciphertext_page = alloc_page(GFP_NOFS);
+ if (!ciphertext_page) {
+ /* This is a potential bottleneck, but at least we'll have
+ * forward progress. */
+ ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
+ GFP_NOFS);
+ if (WARN_ON_ONCE(!ciphertext_page)) {
+ ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
+ GFP_NOFS | __GFP_WAIT);
+ }
+ ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+ } else {
+ ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+ }
+ ctx->bounce_page = ciphertext_page;
+ ctx->control_page = plaintext_page;
+ err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, plaintext_page->index,
+ plaintext_page, ciphertext_page);
+ if (err) {
+ ext4_release_crypto_ctx(ctx);
+ return ERR_PTR(err);
+ }
+ SetPagePrivate(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)ctx);
+ lock_page(ciphertext_page);
+ return ciphertext_page;
+}
+
+/**
+ * ext4_decrypt() - Decrypts a page in-place
+ * @ctx: The encryption context.
+ * @page: The page to decrypt. Must be locked.
+ *
+ * Decrypts page in-place using the ctx encryption context.
+ *
+ * Called from the read completion callback.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+
+ return ext4_page_crypto(ctx, page->mapping->host,
+ EXT4_DECRYPT, page->index, page, page);
+}
+
+/*
+ * Convenience function which takes care of allocating and
+ * deallocating the encryption context
+ */
+int ext4_decrypt_one(struct inode *inode, struct page *page)
+{
+ int ret;
+
+ struct ext4_crypto_ctx *ctx = ext4_get_crypto_ctx(inode);
+
+ if (!ctx)
+ return -ENOMEM;
+ ret = ext4_decrypt(ctx, page);
+ ext4_release_crypto_ctx(ctx);
+ return ret;
+}
+
+int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+ struct ext4_crypto_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ struct bio *bio;
+ ext4_lblk_t lblk = ex->ee_block;
+ ext4_fsblk_t pblk = ext4_ext_pblock(ex);
+ unsigned int len = ext4_ext_get_actual_len(ex);
+ int err = 0;
+
+ BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+
+ ctx = ext4_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ciphertext_page = alloc_page(GFP_NOFS);
+ if (!ciphertext_page) {
+ /* This is a potential bottleneck, but at least we'll have
+ * forward progress. */
+ ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
+ GFP_NOFS);
+ if (WARN_ON_ONCE(!ciphertext_page)) {
+ ciphertext_page = mempool_alloc(ext4_bounce_page_pool,
+ GFP_NOFS | __GFP_WAIT);
+ }
+ ctx->flags &= ~EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+ } else {
+ ctx->flags |= EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL;
+ }
+ ctx->bounce_page = ciphertext_page;
+
+ while (len--) {
+ err = ext4_page_crypto(ctx, inode, EXT4_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page);
+ if (err)
+ goto errout;
+
+ bio = bio_alloc(GFP_KERNEL, 1);
+ if (!bio) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_iter.bi_sector = pblk;
+ err = bio_add_page(bio, ciphertext_page,
+ inode->i_sb->s_blocksize, 0);
+ if (err) {
+ bio_put(bio);
+ goto errout;
+ }
+ err = submit_bio_wait(WRITE, bio);
+ if (err)
+ goto errout;
+ }
+ err = 0;
+errout:
+ ext4_release_crypto_ctx(ctx);
+ return err;
+}
+
+bool ext4_valid_contents_enc_mode(uint32_t mode)
+{
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
+}
+
+/**
+ * ext4_validate_encryption_key_size() - Validate the encryption key size
+ * @mode: The key mode.
+ * @size: The key size to validate.
+ *
+ * Return: The validated key size for @mode. Zero if invalid.
+ */
+uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
+{
+ if (size == ext4_encryption_key_size(mode))
+ return size;
+ return 0;
+}
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
new file mode 100644
index 000000000000..ca2f5948c1ac
--- /dev/null
+++ b/fs/ext4/crypto_fname.c
@@ -0,0 +1,709 @@
+/*
+ * linux/fs/ext4/crypto_fname.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains functions for filename crypto management in ext4
+ *
+ * Written by Uday Savagaonkar, 2014.
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ */
+
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/crypto.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/key.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock_types.h>
+
+#include "ext4.h"
+#include "ext4_crypto.h"
+#include "xattr.h"
+
+/**
+ * ext4_dir_crypt_complete() -
+ */
+static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
+{
+ struct ext4_completion_result *ecr = req->data;
+
+ if (res == -EINPROGRESS)
+ return;
+ ecr->res = res;
+ complete(&ecr->completion);
+}
+
+bool ext4_valid_filenames_enc_mode(uint32_t mode)
+{
+ return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
+}
+
+/**
+ * ext4_fname_encrypt() -
+ *
+ * This function encrypts the input filename, and returns the length of the
+ * ciphertext. Errors are returned as negative numbers. We trust the caller to
+ * allocate sufficient memory to oname string.
+ */
+static int ext4_fname_encrypt(struct ext4_fname_crypto_ctx *ctx,
+ const struct qstr *iname,
+ struct ext4_str *oname)
+{
+ u32 ciphertext_len;
+ struct ablkcipher_request *req = NULL;
+ DECLARE_EXT4_COMPLETION_RESULT(ecr);
+ struct crypto_ablkcipher *tfm = ctx->ctfm;
+ int res = 0;
+ char iv[EXT4_CRYPTO_BLOCK_SIZE];
+ struct scatterlist sg[1];
+ char *workbuf;
+
+ if (iname->len <= 0 || iname->len > ctx->lim)
+ return -EIO;
+
+ ciphertext_len = (iname->len < EXT4_CRYPTO_BLOCK_SIZE) ?
+ EXT4_CRYPTO_BLOCK_SIZE : iname->len;
+ ciphertext_len = (ciphertext_len > ctx->lim)
+ ? ctx->lim : ciphertext_len;
+
+ /* Allocate request */
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(
+ KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ ext4_dir_crypt_complete, &ecr);
+
+ /* Map the workpage */
+ workbuf = kmap(ctx->workpage);
+
+ /* Copy the input */
+ memcpy(workbuf, iname->name, iname->len);
+ if (iname->len < ciphertext_len)
+ memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
+
+ /* Initialize IV */
+ memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
+
+ /* Create encryption request */
+ sg_init_table(sg, 1);
+ sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
+ ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ if (res >= 0) {
+ /* Copy the result to output */
+ memcpy(oname->name, workbuf, ciphertext_len);
+ res = ciphertext_len;
+ }
+ kunmap(ctx->workpage);
+ ablkcipher_request_free(req);
+ if (res < 0) {
+ printk_ratelimited(
+ KERN_ERR "%s: Error (error code %d)\n", __func__, res);
+ }
+ oname->len = ciphertext_len;
+ return res;
+}
+
+/*
+ * ext4_fname_decrypt()
+ * This function decrypts the input filename, and returns
+ * the length of the plaintext.
+ * Errors are returned as negative numbers.
+ * We trust the caller to allocate sufficient memory to oname string.
+ */
+static int ext4_fname_decrypt(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_str *iname,
+ struct ext4_str *oname)
+{
+ struct ext4_str tmp_in[2], tmp_out[1];
+ struct ablkcipher_request *req = NULL;
+ DECLARE_EXT4_COMPLETION_RESULT(ecr);
+ struct scatterlist sg[1];
+ struct crypto_ablkcipher *tfm = ctx->ctfm;
+ int res = 0;
+ char iv[EXT4_CRYPTO_BLOCK_SIZE];
+ char *workbuf;
+
+ if (iname->len <= 0 || iname->len > ctx->lim)
+ return -EIO;
+
+ tmp_in[0].name = iname->name;
+ tmp_in[0].len = iname->len;
+ tmp_out[0].name = oname->name;
+
+ /* Allocate request */
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ printk_ratelimited(
+ KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
+ return -ENOMEM;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ ext4_dir_crypt_complete, &ecr);
+
+ /* Map the workpage */
+ workbuf = kmap(ctx->workpage);
+
+ /* Copy the input */
+ memcpy(workbuf, iname->name, iname->len);
+
+ /* Initialize IV */
+ memset(iv, 0, EXT4_CRYPTO_BLOCK_SIZE);
+
+ /* Create encryption request */
+ sg_init_table(sg, 1);
+ sg_set_page(sg, ctx->workpage, PAGE_SIZE, 0);
+ ablkcipher_request_set_crypt(req, sg, sg, iname->len, iv);
+ res = crypto_ablkcipher_decrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ if (res >= 0) {
+ /* Copy the result to output */
+ memcpy(oname->name, workbuf, iname->len);
+ res = iname->len;
+ }
+ kunmap(ctx->workpage);
+ ablkcipher_request_free(req);
+ if (res < 0) {
+ printk_ratelimited(
+ KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
+ __func__, res);
+ return res;
+ }
+
+ oname->len = strnlen(oname->name, iname->len);
+ return oname->len;
+}
+
+/**
+ * ext4_fname_encode_digest() -
+ *
+ * Encodes the input digest using characters from the set [a-zA-Z0-9_+].
+ * The encoded string is roughly 4/3 times the size of the input string.
+ */
+int ext4_fname_encode_digest(char *dst, char *src, u32 len)
+{
+ static const char *lookup_table =
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_+";
+ u32 current_chunk, num_chunks, i;
+ char tmp_buf[3];
+ u32 c0, c1, c2, c3;
+
+ current_chunk = 0;
+ num_chunks = len/3;
+ for (i = 0; i < num_chunks; i++) {
+ c0 = src[3*i] & 0x3f;
+ c1 = (((src[3*i]>>6)&0x3) | ((src[3*i+1] & 0xf)<<2)) & 0x3f;
+ c2 = (((src[3*i+1]>>4)&0xf) | ((src[3*i+2] & 0x3)<<4)) & 0x3f;
+ c3 = (src[3*i+2]>>2) & 0x3f;
+ dst[4*i] = lookup_table[c0];
+ dst[4*i+1] = lookup_table[c1];
+ dst[4*i+2] = lookup_table[c2];
+ dst[4*i+3] = lookup_table[c3];
+ }
+ if (i*3 < len) {
+ memset(tmp_buf, 0, 3);
+ memcpy(tmp_buf, &src[3*i], len-3*i);
+ c0 = tmp_buf[0] & 0x3f;
+ c1 = (((tmp_buf[0]>>6)&0x3) | ((tmp_buf[1] & 0xf)<<2)) & 0x3f;
+ c2 = (((tmp_buf[1]>>4)&0xf) | ((tmp_buf[2] & 0x3)<<4)) & 0x3f;
+ c3 = (tmp_buf[2]>>2) & 0x3f;
+ dst[4*i] = lookup_table[c0];
+ dst[4*i+1] = lookup_table[c1];
+ dst[4*i+2] = lookup_table[c2];
+ dst[4*i+3] = lookup_table[c3];
+ i++;
+ }
+ return (i * 4);
+}
+
+/**
+ * ext4_fname_hash() -
+ *
+ * This function computes the hash of the input filename, and sets the output
+ * buffer to the *encoded* digest. It returns the length of the digest as its
+ * return value. Errors are returned as negative numbers. We trust the caller
+ * to allocate sufficient memory to oname string.
+ */
+static int ext4_fname_hash(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_str *iname,
+ struct ext4_str *oname)
+{
+ struct scatterlist sg;
+ struct hash_desc desc = {
+ .tfm = (struct crypto_hash *)ctx->htfm,
+ .flags = CRYPTO_TFM_REQ_MAY_SLEEP
+ };
+ int res = 0;
+
+ if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
+ res = ext4_fname_encode_digest(oname->name, iname->name,
+ iname->len);
+ oname->len = res;
+ return res;
+ }
+
+ sg_init_one(&sg, iname->name, iname->len);
+ res = crypto_hash_init(&desc);
+ if (res) {
+ printk(KERN_ERR
+ "%s: Error initializing crypto hash; res = [%d]\n",
+ __func__, res);
+ goto out;
+ }
+ res = crypto_hash_update(&desc, &sg, iname->len);
+ if (res) {
+ printk(KERN_ERR
+ "%s: Error updating crypto hash; res = [%d]\n",
+ __func__, res);
+ goto out;
+ }
+ res = crypto_hash_final(&desc,
+ &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE]);
+ if (res) {
+ printk(KERN_ERR
+ "%s: Error finalizing crypto hash; res = [%d]\n",
+ __func__, res);
+ goto out;
+ }
+ /* Encode the digest as a printable string--this will increase the
+ * size of the digest */
+ oname->name[0] = 'I';
+ res = ext4_fname_encode_digest(oname->name+1,
+ &oname->name[EXT4_FNAME_CRYPTO_DIGEST_SIZE],
+ EXT4_FNAME_CRYPTO_DIGEST_SIZE) + 1;
+ oname->len = res;
+out:
+ return res;
+}
+
+/**
+ * ext4_free_fname_crypto_ctx() -
+ *
+ * Frees up a crypto context.
+ */
+void ext4_free_fname_crypto_ctx(struct ext4_fname_crypto_ctx *ctx)
+{
+ if (ctx == NULL || IS_ERR(ctx))
+ return;
+
+ if (ctx->ctfm && !IS_ERR(ctx->ctfm))
+ crypto_free_ablkcipher(ctx->ctfm);
+ if (ctx->htfm && !IS_ERR(ctx->htfm))
+ crypto_free_hash(ctx->htfm);
+ if (ctx->workpage && !IS_ERR(ctx->workpage))
+ __free_page(ctx->workpage);
+ kfree(ctx);
+}
+
+/**
+ * ext4_put_fname_crypto_ctx() -
+ *
+ * Return: The crypto context onto free list. If the free list is above a
+ * threshold, completely frees up the context, and returns the memory.
+ *
+ * TODO: Currently we directly free the crypto context. Eventually we should
+ * add code it to return to free list. Such an approach will increase
+ * efficiency of directory lookup.
+ */
+void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx)
+{
+ if (*ctx == NULL || IS_ERR(*ctx))
+ return;
+ ext4_free_fname_crypto_ctx(*ctx);
+ *ctx = NULL;
+}
+
+/**
+ * ext4_search_fname_crypto_ctx() -
+ */
+static struct ext4_fname_crypto_ctx *ext4_search_fname_crypto_ctx(
+ const struct ext4_encryption_key *key)
+{
+ return NULL;
+}
+
+/**
+ * ext4_alloc_fname_crypto_ctx() -
+ */
+struct ext4_fname_crypto_ctx *ext4_alloc_fname_crypto_ctx(
+ const struct ext4_encryption_key *key)
+{
+ struct ext4_fname_crypto_ctx *ctx;
+
+ ctx = kmalloc(sizeof(struct ext4_fname_crypto_ctx), GFP_NOFS);
+ if (ctx == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (key->mode == EXT4_ENCRYPTION_MODE_INVALID) {
+ /* This will automatically set key mode to invalid
+ * As enum for ENCRYPTION_MODE_INVALID is zero */
+ memset(&ctx->key, 0, sizeof(ctx->key));
+ } else {
+ memcpy(&ctx->key, key, sizeof(struct ext4_encryption_key));
+ }
+ ctx->has_valid_key = (EXT4_ENCRYPTION_MODE_INVALID == key->mode)
+ ? 0 : 1;
+ ctx->ctfm_key_is_ready = 0;
+ ctx->ctfm = NULL;
+ ctx->htfm = NULL;
+ ctx->workpage = NULL;
+ return ctx;
+}
+
+/**
+ * ext4_get_fname_crypto_ctx() -
+ *
+ * Allocates a free crypto context and initializes it to hold
+ * the crypto material for the inode.
+ *
+ * Return: NULL if not encrypted. Error value on error. Valid pointer otherwise.
+ */
+struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(
+ struct inode *inode, u32 max_ciphertext_len)
+{
+ struct ext4_fname_crypto_ctx *ctx;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int res;
+
+ /* Check if the crypto policy is set on the inode */
+ res = ext4_encrypted_inode(inode);
+ if (res == 0)
+ return NULL;
+
+ if (!ext4_has_encryption_key(inode))
+ ext4_generate_encryption_key(inode);
+
+ /* Get a crypto context based on the key.
+ * A new context is allocated if no context matches the requested key.
+ */
+ ctx = ext4_search_fname_crypto_ctx(&(ei->i_encryption_key));
+ if (ctx == NULL)
+ ctx = ext4_alloc_fname_crypto_ctx(&(ei->i_encryption_key));
+ if (IS_ERR(ctx))
+ return ctx;
+
+ if (ctx->has_valid_key) {
+ if (ctx->key.mode != EXT4_ENCRYPTION_MODE_AES_256_CTS) {
+ printk_once(KERN_WARNING
+ "ext4: unsupported key mode %d\n",
+ ctx->key.mode);
+ return ERR_PTR(-ENOKEY);
+ }
+
+ /* As a first cut, we will allocate new tfm in every call.
+ * later, we will keep the tfm around, in case the key gets
+ * re-used */
+ if (ctx->ctfm == NULL) {
+ ctx->ctfm = crypto_alloc_ablkcipher("cts(cbc(aes))",
+ 0, 0);
+ }
+ if (IS_ERR(ctx->ctfm)) {
+ res = PTR_ERR(ctx->ctfm);
+ printk(
+ KERN_DEBUG "%s: error (%d) allocating crypto tfm\n",
+ __func__, res);
+ ctx->ctfm = NULL;
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(res);
+ }
+ if (ctx->ctfm == NULL) {
+ printk(
+ KERN_DEBUG "%s: could not allocate crypto tfm\n",
+ __func__);
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+ if (ctx->workpage == NULL)
+ ctx->workpage = alloc_page(GFP_NOFS);
+ if (IS_ERR(ctx->workpage)) {
+ res = PTR_ERR(ctx->workpage);
+ printk(
+ KERN_DEBUG "%s: error (%d) allocating work page\n",
+ __func__, res);
+ ctx->workpage = NULL;
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(res);
+ }
+ if (ctx->workpage == NULL) {
+ printk(
+ KERN_DEBUG "%s: could not allocate work page\n",
+ __func__);
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+ ctx->lim = max_ciphertext_len;
+ crypto_ablkcipher_clear_flags(ctx->ctfm, ~0);
+ crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctx->ctfm),
+ CRYPTO_TFM_REQ_WEAK_KEY);
+
+ /* If we are lucky, we will get a context that is already
+ * set up with the right key. Else, we will have to
+ * set the key */
+ if (!ctx->ctfm_key_is_ready) {
+ /* Since our crypto objectives for filename encryption
+ * are pretty weak,
+ * we directly use the inode master key */
+ res = crypto_ablkcipher_setkey(ctx->ctfm,
+ ctx->key.raw, ctx->key.size);
+ if (res) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(-EIO);
+ }
+ ctx->ctfm_key_is_ready = 1;
+ } else {
+ /* In the current implementation, key should never be
+ * marked "ready" for a context that has just been
+ * allocated. So we should never reach here */
+ BUG();
+ }
+ }
+ if (ctx->htfm == NULL)
+ ctx->htfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(ctx->htfm)) {
+ res = PTR_ERR(ctx->htfm);
+ printk(KERN_DEBUG "%s: error (%d) allocating hash tfm\n",
+ __func__, res);
+ ctx->htfm = NULL;
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(res);
+ }
+ if (ctx->htfm == NULL) {
+ printk(KERN_DEBUG "%s: could not allocate hash tfm\n",
+ __func__);
+ ext4_put_fname_crypto_ctx(&ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return ctx;
+}
+
+/**
+ * ext4_fname_crypto_round_up() -
+ *
+ * Return: The next multiple of block size
+ */
+u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
+{
+ return ((size+blksize-1)/blksize)*blksize;
+}
+
+/**
+ * ext4_fname_crypto_namelen_on_disk() -
+ */
+int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
+ u32 namelen)
+{
+ u32 ciphertext_len;
+
+ if (ctx == NULL)
+ return -EIO;
+ if (!(ctx->has_valid_key))
+ return -EACCES;
+ ciphertext_len = (namelen < EXT4_CRYPTO_BLOCK_SIZE) ?
+ EXT4_CRYPTO_BLOCK_SIZE : namelen;
+ ciphertext_len = (ciphertext_len > ctx->lim)
+ ? ctx->lim : ciphertext_len;
+ return (int) ciphertext_len;
+}
+
+/**
+ * ext4_fname_crypto_alloc_obuff() -
+ *
+ * Allocates an output buffer that is sufficient for the crypto operation
+ * specified by the context and the direction.
+ */
+int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+ u32 ilen, struct ext4_str *crypto_str)
+{
+ unsigned int olen;
+
+ if (!ctx)
+ return -EIO;
+ olen = ext4_fname_crypto_round_up(ilen, EXT4_CRYPTO_BLOCK_SIZE);
+ crypto_str->len = olen;
+ if (olen < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2)
+ olen = EXT4_FNAME_CRYPTO_DIGEST_SIZE*2;
+ /* Allocated buffer can hold one more character to null-terminate the
+ * string */
+ crypto_str->name = kmalloc(olen+1, GFP_NOFS);
+ if (!(crypto_str->name))
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * ext4_fname_crypto_free_buffer() -
+ *
+ * Frees the buffer allocated for crypto operation.
+ */
+void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str)
+{
+ if (!crypto_str)
+ return;
+ kfree(crypto_str->name);
+ crypto_str->name = NULL;
+}
+
+/**
+ * ext4_fname_disk_to_usr() - converts a filename from disk space to user space
+ */
+int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_str *iname,
+ struct ext4_str *oname)
+{
+ if (ctx == NULL)
+ return -EIO;
+ if (iname->len < 3) {
+ /*Check for . and .. */
+ if (iname->name[0] == '.' && iname->name[iname->len-1] == '.') {
+ oname->name[0] = '.';
+ oname->name[iname->len-1] = '.';
+ oname->len = iname->len;
+ return oname->len;
+ }
+ }
+ if (ctx->has_valid_key)
+ return ext4_fname_decrypt(ctx, iname, oname);
+ else
+ return ext4_fname_hash(ctx, iname, oname);
+}
+
+int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_dir_entry_2 *de,
+ struct ext4_str *oname)
+{
+ struct ext4_str iname = {.name = (unsigned char *) de->name,
+ .len = de->name_len };
+
+ return _ext4_fname_disk_to_usr(ctx, &iname, oname);
+}
+
+
+/**
+ * ext4_fname_usr_to_disk() - converts a filename from user space to disk space
+ */
+int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+ const struct qstr *iname,
+ struct ext4_str *oname)
+{
+ int res;
+
+ if (ctx == NULL)
+ return -EIO;
+ if (iname->len < 3) {
+ /*Check for . and .. */
+ if (iname->name[0] == '.' &&
+ iname->name[iname->len-1] == '.') {
+ oname->name[0] = '.';
+ oname->name[iname->len-1] = '.';
+ oname->len = iname->len;
+ return oname->len;
+ }
+ }
+ if (ctx->has_valid_key) {
+ res = ext4_fname_encrypt(ctx, iname, oname);
+ return res;
+ }
+ /* Without a proper key, a user is not allowed to modify the filenames
+ * in a directory. Consequently, a user space name cannot be mapped to
+ * a disk-space name */
+ return -EACCES;
+}
+
+/*
+ * Calculate the htree hash from a filename from user space
+ */
+int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
+ const struct qstr *iname,
+ struct dx_hash_info *hinfo)
+{
+ struct ext4_str tmp, tmp2;
+ int ret = 0;
+
+ if (!ctx || !ctx->has_valid_key ||
+ ((iname->name[0] == '.') &&
+ ((iname->len == 1) ||
+ ((iname->name[1] == '.') && (iname->len == 2))))) {
+ ext4fs_dirhash(iname->name, iname->len, hinfo);
+ return 0;
+ }
+
+ /* First encrypt the plaintext name */
+ ret = ext4_fname_crypto_alloc_buffer(ctx, iname->len, &tmp);
+ if (ret < 0)
+ return ret;
+
+ ret = ext4_fname_encrypt(ctx, iname, &tmp);
+ if (ret < 0)
+ goto out;
+
+ tmp2.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
+ tmp2.name = kmalloc(tmp2.len + 1, GFP_KERNEL);
+ if (tmp2.name == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = ext4_fname_hash(ctx, &tmp, &tmp2);
+ if (ret > 0)
+ ext4fs_dirhash(tmp2.name, tmp2.len, hinfo);
+ ext4_fname_crypto_free_buffer(&tmp2);
+out:
+ ext4_fname_crypto_free_buffer(&tmp);
+ return ret;
+}
+
+/**
+ * ext4_fname_disk_to_htree() - converts a filename from disk space to htree-access string
+ */
+int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_dir_entry_2 *de,
+ struct dx_hash_info *hinfo)
+{
+ struct ext4_str iname = {.name = (unsigned char *) de->name,
+ .len = de->name_len};
+ struct ext4_str tmp;
+ int ret;
+
+ if (!ctx ||
+ ((iname.name[0] == '.') &&
+ ((iname.len == 1) ||
+ ((iname.name[1] == '.') && (iname.len == 2))))) {
+ ext4fs_dirhash(iname.name, iname.len, hinfo);
+ return 0;
+ }
+
+ tmp.len = (4 * ((EXT4_FNAME_CRYPTO_DIGEST_SIZE + 2) / 3)) + 1;
+ tmp.name = kmalloc(tmp.len + 1, GFP_KERNEL);
+ if (tmp.name == NULL)
+ return -ENOMEM;
+
+ ret = ext4_fname_hash(ctx, &iname, &tmp);
+ if (ret > 0)
+ ext4fs_dirhash(tmp.name, tmp.len, hinfo);
+ ext4_fname_crypto_free_buffer(&tmp);
+ return ret;
+}
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
new file mode 100644
index 000000000000..c8392af8abbb
--- /dev/null
+++ b/fs/ext4/crypto_key.c
@@ -0,0 +1,165 @@
+/*
+ * linux/fs/ext4/crypto_key.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption key functions for ext4
+ *
+ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
+ */
+
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <uapi/linux/keyctl.h>
+
+#include "ext4.h"
+#include "xattr.h"
+
+static void derive_crypt_complete(struct crypto_async_request *req, int rc)
+{
+ struct ext4_completion_result *ecr = req->data;
+
+ if (rc == -EINPROGRESS)
+ return;
+
+ ecr->res = rc;
+ complete(&ecr->completion);
+}
+
+/**
+ * ext4_derive_key_aes() - Derive a key using AES-128-ECB
+ * @deriving_key: Encryption key used for derivatio.
+ * @source_key: Source key to which to apply derivation.
+ * @derived_key: Derived key.
+ *
+ * Return: Zero on success; non-zero otherwise.
+ */
+static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
+ char source_key[EXT4_AES_256_XTS_KEY_SIZE],
+ char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
+{
+ int res = 0;
+ struct ablkcipher_request *req = NULL;
+ DECLARE_EXT4_COMPLETION_RESULT(ecr);
+ struct scatterlist src_sg, dst_sg;
+ struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
+ 0);
+
+ if (IS_ERR(tfm)) {
+ res = PTR_ERR(tfm);
+ tfm = NULL;
+ goto out;
+ }
+ crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ res = -ENOMEM;
+ goto out;
+ }
+ ablkcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ derive_crypt_complete, &ecr);
+ res = crypto_ablkcipher_setkey(tfm, deriving_key,
+ EXT4_AES_128_ECB_KEY_SIZE);
+ if (res < 0)
+ goto out;
+ sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
+ sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
+ ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ EXT4_AES_256_XTS_KEY_SIZE, NULL);
+ res = crypto_ablkcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+
+out:
+ if (req)
+ ablkcipher_request_free(req);
+ if (tfm)
+ crypto_free_ablkcipher(tfm);
+ return res;
+}
+
+/**
+ * ext4_generate_encryption_key() - generates an encryption key
+ * @inode: The inode to generate the encryption key for.
+ */
+int ext4_generate_encryption_key(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
+ char full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
+ (EXT4_KEY_DESCRIPTOR_SIZE * 2) + 1];
+ struct key *keyring_key = NULL;
+ struct ext4_encryption_key *master_key;
+ struct ext4_encryption_context ctx;
+ struct user_key_payload *ukp;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &ctx, sizeof(ctx));
+
+ if (res != sizeof(ctx)) {
+ if (res > 0)
+ res = -EINVAL;
+ goto out;
+ }
+ res = 0;
+
+ if (S_ISREG(inode->i_mode))
+ crypt_key->mode = ctx.contents_encryption_mode;
+ else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ crypt_key->mode = ctx.filenames_encryption_mode;
+ else {
+ printk(KERN_ERR "ext4 crypto: Unsupported inode type.\n");
+ BUG();
+ }
+ crypt_key->size = ext4_encryption_key_size(crypt_key->mode);
+ BUG_ON(!crypt_key->size);
+ if (DUMMY_ENCRYPTION_ENABLED(sbi)) {
+ memset(crypt_key->raw, 0x42, EXT4_AES_256_XTS_KEY_SIZE);
+ goto out;
+ }
+ memcpy(full_key_descriptor, EXT4_KEY_DESC_PREFIX,
+ EXT4_KEY_DESC_PREFIX_SIZE);
+ sprintf(full_key_descriptor + EXT4_KEY_DESC_PREFIX_SIZE,
+ "%*phN", EXT4_KEY_DESCRIPTOR_SIZE,
+ ctx.master_key_descriptor);
+ full_key_descriptor[EXT4_KEY_DESC_PREFIX_SIZE +
+ (2 * EXT4_KEY_DESCRIPTOR_SIZE)] = '\0';
+ keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
+ if (IS_ERR(keyring_key)) {
+ res = PTR_ERR(keyring_key);
+ keyring_key = NULL;
+ goto out;
+ }
+ BUG_ON(keyring_key->type != &key_type_logon);
+ ukp = ((struct user_key_payload *)keyring_key->payload.data);
+ if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
+ res = -EINVAL;
+ goto out;
+ }
+ master_key = (struct ext4_encryption_key *)ukp->data;
+ BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE !=
+ EXT4_KEY_DERIVATION_NONCE_SIZE);
+ BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
+ res = ext4_derive_key_aes(ctx.nonce, master_key->raw, crypt_key->raw);
+out:
+ if (keyring_key)
+ key_put(keyring_key);
+ if (res < 0)
+ crypt_key->mode = EXT4_ENCRYPTION_MODE_INVALID;
+ return res;
+}
+
+int ext4_has_encryption_key(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_encryption_key *crypt_key = &ei->i_encryption_key;
+
+ return (crypt_key->mode != EXT4_ENCRYPTION_MODE_INVALID);
+}
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
new file mode 100644
index 000000000000..30eaf9e9864a
--- /dev/null
+++ b/fs/ext4/crypto_policy.c
@@ -0,0 +1,194 @@
+/*
+ * linux/fs/ext4/crypto_policy.c
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption policy functions for ext4
+ *
+ * Written by Michael Halcrow, 2015.
+ */
+
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "ext4.h"
+#include "xattr.h"
+
+static int ext4_inode_has_encryption_context(struct inode *inode)
+{
+ int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0);
+ return (res > 0);
+}
+
+/*
+ * check whether the policy is consistent with the encryption context
+ * for the inode
+ */
+static int ext4_is_encryption_context_consistent_with_policy(
+ struct inode *inode, const struct ext4_encryption_policy *policy)
+{
+ struct ext4_encryption_context ctx;
+ int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx));
+ if (res != sizeof(ctx))
+ return 0;
+ return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
+ EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (ctx.contents_encryption_mode ==
+ policy->contents_encryption_mode) &&
+ (ctx.filenames_encryption_mode ==
+ policy->filenames_encryption_mode));
+}
+
+static int ext4_create_encryption_context_from_policy(
+ struct inode *inode, const struct ext4_encryption_policy *policy)
+{
+ struct ext4_encryption_context ctx;
+ int res = 0;
+
+ ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
+ memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
+ EXT4_KEY_DESCRIPTOR_SIZE);
+ if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid contents encryption mode %d\n", __func__,
+ policy->contents_encryption_mode);
+ res = -EINVAL;
+ goto out;
+ }
+ if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid filenames encryption mode %d\n", __func__,
+ policy->filenames_encryption_mode);
+ res = -EINVAL;
+ goto out;
+ }
+ ctx.contents_encryption_mode = policy->contents_encryption_mode;
+ ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+ BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
+ get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
+
+ res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx), 0);
+out:
+ if (!res)
+ ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+ return res;
+}
+
+int ext4_process_policy(const struct ext4_encryption_policy *policy,
+ struct inode *inode)
+{
+ if (policy->version != 0)
+ return -EINVAL;
+
+ if (!ext4_inode_has_encryption_context(inode)) {
+ if (!ext4_empty_dir(inode))
+ return -ENOTEMPTY;
+ return ext4_create_encryption_context_from_policy(inode,
+ policy);
+ }
+
+ if (ext4_is_encryption_context_consistent_with_policy(inode, policy))
+ return 0;
+
+ printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
+ __func__);
+ return -EINVAL;
+}
+
+int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
+{
+ struct ext4_encryption_context ctx;
+
+ int res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &ctx, sizeof(ctx));
+ if (res != sizeof(ctx))
+ return -ENOENT;
+ if (ctx.format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1)
+ return -EINVAL;
+ policy->version = 0;
+ policy->contents_encryption_mode = ctx.contents_encryption_mode;
+ policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+ memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
+ EXT4_KEY_DESCRIPTOR_SIZE);
+ return 0;
+}
+
+int ext4_is_child_context_consistent_with_parent(struct inode *parent,
+ struct inode *child)
+{
+ struct ext4_encryption_context parent_ctx, child_ctx;
+ int res;
+
+ if ((parent == NULL) || (child == NULL)) {
+ pr_err("parent %p child %p\n", parent, child);
+ BUG_ON(1);
+ }
+ /* no restrictions if the parent directory is not encrypted */
+ if (!ext4_encrypted_inode(parent))
+ return 1;
+ res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &parent_ctx, sizeof(parent_ctx));
+ if (res != sizeof(parent_ctx))
+ return 0;
+ /* if the child directory is not encrypted, this is always a problem */
+ if (!ext4_encrypted_inode(child))
+ return 0;
+ res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &child_ctx, sizeof(child_ctx));
+ if (res != sizeof(child_ctx))
+ return 0;
+ return (memcmp(parent_ctx.master_key_descriptor,
+ child_ctx.master_key_descriptor,
+ EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ctx.contents_encryption_mode ==
+ child_ctx.contents_encryption_mode) &&
+ (parent_ctx.filenames_encryption_mode ==
+ child_ctx.filenames_encryption_mode));
+}
+
+/**
+ * ext4_inherit_context() - Sets a child context from its parent
+ * @parent: Parent inode from which the context is inherited.
+ * @child: Child inode that inherits the context from @parent.
+ *
+ * Return: Zero on success, non-zero otherwise
+ */
+int ext4_inherit_context(struct inode *parent, struct inode *child)
+{
+ struct ext4_encryption_context ctx;
+ int res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &ctx, sizeof(ctx));
+
+ if (res != sizeof(ctx)) {
+ if (DUMMY_ENCRYPTION_ENABLED(EXT4_SB(parent->i_sb))) {
+ ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
+ ctx.contents_encryption_mode =
+ EXT4_ENCRYPTION_MODE_AES_256_XTS;
+ ctx.filenames_encryption_mode =
+ EXT4_ENCRYPTION_MODE_AES_256_CTS;
+ memset(ctx.master_key_descriptor, 0x42,
+ EXT4_KEY_DESCRIPTOR_SIZE);
+ res = 0;
+ } else {
+ goto out;
+ }
+ }
+ get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
+ res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
+ sizeof(ctx), 0);
+out:
+ if (!res)
+ ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
+ return res;
+}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c24143ea9c08..61db51a5ce4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -22,10 +22,8 @@
*/
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
-#include <linux/rbtree.h>
#include "ext4.h"
#include "xattr.h"
@@ -110,7 +108,10 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
int err;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
+ struct buffer_head *bh = NULL;
int dir_has_error = 0;
+ struct ext4_fname_crypto_ctx *enc_ctx = NULL;
+ struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
@@ -127,17 +128,28 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
if (ext4_has_inline_data(inode)) {
int has_inline_data = 1;
- int ret = ext4_read_inline_dir(file, ctx,
+ err = ext4_read_inline_dir(file, ctx,
&has_inline_data);
if (has_inline_data)
- return ret;
+ return err;
+ }
+
+ enc_ctx = ext4_get_fname_crypto_ctx(inode, EXT4_NAME_LEN);
+ if (IS_ERR(enc_ctx))
+ return PTR_ERR(enc_ctx);
+ if (enc_ctx) {
+ err = ext4_fname_crypto_alloc_buffer(enc_ctx, EXT4_NAME_LEN,
+ &fname_crypto_str);
+ if (err < 0) {
+ ext4_put_fname_crypto_ctx(&enc_ctx);
+ return err;
+ }
}
offset = ctx->pos & (sb->s_blocksize - 1);
while (ctx->pos < inode->i_size) {
struct ext4_map_blocks map;
- struct buffer_head *bh = NULL;
map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
map.m_len = 1;
@@ -180,6 +192,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
(unsigned long long)ctx->pos);
ctx->pos += sb->s_blocksize - offset;
brelse(bh);
+ bh = NULL;
continue;
}
set_buffer_verified(bh);
@@ -226,25 +239,44 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
if (le32_to_cpu(de->inode)) {
- if (!dir_emit(ctx, de->name,
- de->name_len,
- le32_to_cpu(de->inode),
- get_dtype(sb, de->file_type))) {
- brelse(bh);
- return 0;
+ if (enc_ctx == NULL) {
+ /* Directory is not encrypted */
+ if (!dir_emit(ctx, de->name,
+ de->name_len,
+ le32_to_cpu(de->inode),
+ get_dtype(sb, de->file_type)))
+ goto done;
+ } else {
+ /* Directory is encrypted */
+ err = ext4_fname_disk_to_usr(enc_ctx,
+ de, &fname_crypto_str);
+ if (err < 0)
+ goto errout;
+ if (!dir_emit(ctx,
+ fname_crypto_str.name, err,
+ le32_to_cpu(de->inode),
+ get_dtype(sb, de->file_type)))
+ goto done;
}
}
ctx->pos += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
}
- offset = 0;
+ if ((ctx->pos < inode->i_size) && !dir_relax(inode))
+ goto done;
brelse(bh);
- if (ctx->pos < inode->i_size) {
- if (!dir_relax(inode))
- return 0;
- }
+ bh = NULL;
+ offset = 0;
}
- return 0;
+done:
+ err = 0;
+errout:
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ext4_put_fname_crypto_ctx(&enc_ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+#endif
+ brelse(bh);
+ return err;
}
static inline int is_32bit_api(void)
@@ -384,10 +416,15 @@ void ext4_htree_free_dir_info(struct dir_private_info *p)
/*
* Given a directory entry, enter it into the fname rb tree.
+ *
+ * When filename encryption is enabled, the dirent will hold the
+ * encrypted filename, while the htree will hold decrypted filename.
+ * The decrypted filename is passed in via ent_name. parameter.
*/
int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash,
- struct ext4_dir_entry_2 *dirent)
+ struct ext4_dir_entry_2 *dirent,
+ struct ext4_str *ent_name)
{
struct rb_node **p, *parent = NULL;
struct fname *fname, *new_fn;
@@ -398,17 +435,17 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
p = &info->root.rb_node;
/* Create and allocate the fname structure */
- len = sizeof(struct fname) + dirent->name_len + 1;
+ len = sizeof(struct fname) + ent_name->len + 1;
new_fn = kzalloc(len, GFP_KERNEL);
if (!new_fn)
return -ENOMEM;
new_fn->hash = hash;
new_fn->minor_hash = minor_hash;
new_fn->inode = le32_to_cpu(dirent->inode);
- new_fn->name_len = dirent->name_len;
+ new_fn->name_len = ent_name->len;
new_fn->file_type = dirent->file_type;
- memcpy(new_fn->name, dirent->name, dirent->name_len);
- new_fn->name[dirent->name_len] = 0;
+ memcpy(new_fn->name, ent_name->name, ent_name->len);
+ new_fn->name[ent_name->len] = 0;
while (*p) {
parent = *p;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f63c3d5805c4..ef267adce19a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -422,7 +422,7 @@ enum {
EXT4_INODE_DIRTY = 8,
EXT4_INODE_COMPRBLK = 9, /* One or more compressed clusters */
EXT4_INODE_NOCOMPR = 10, /* Don't compress */
- EXT4_INODE_ENCRYPT = 11, /* Compression error */
+ EXT4_INODE_ENCRYPT = 11, /* Encrypted file */
/* End compression flags --- maybe not all used */
EXT4_INODE_INDEX = 12, /* hash-indexed directory */
EXT4_INODE_IMAGIC = 13, /* AFS directory */
@@ -582,6 +582,15 @@ enum {
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
+/* Encryption algorithms */
+#define EXT4_ENCRYPTION_MODE_INVALID 0
+#define EXT4_ENCRYPTION_MODE_AES_256_XTS 1
+#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
+#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
+#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
+
+#include "ext4_crypto.h"
+
/*
* ioctl commands
*/
@@ -603,6 +612,9 @@ enum {
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
+#define EXT4_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct ext4_encryption_policy)
+#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
+#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
@@ -939,6 +951,11 @@ struct ext4_inode_info {
/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
__u32 i_csum_seed;
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ /* Encryption params */
+ struct ext4_encryption_key i_encryption_key;
+#endif
};
/*
@@ -1142,7 +1159,8 @@ struct ext4_super_block {
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_checksum_type; /* metadata checksum algorithm used */
- __le16 s_reserved_pad;
+ __u8 s_encryption_level; /* versioning level for encryption */
+ __u8 s_reserved_pad; /* Padding to next 32bits */
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__le32 s_snapshot_inum; /* Inode number of active snapshot */
__le32 s_snapshot_id; /* sequential ID of active snapshot */
@@ -1169,7 +1187,9 @@ struct ext4_super_block {
__le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
__le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
__u8 s_encrypt_algos[4]; /* Encryption algorithms in use */
- __le32 s_reserved[105]; /* Padding to the end of the block */
+ __u8 s_encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
+ __le32 s_lpf_ino; /* Location of the lost+found inode */
+ __le32 s_reserved[100]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};
@@ -1180,8 +1200,16 @@ struct ext4_super_block {
/*
* run-time mount flags
*/
-#define EXT4_MF_MNTDIR_SAMPLED 0x0001
-#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
+#define EXT4_MF_MNTDIR_SAMPLED 0x0001
+#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
+#define EXT4_MF_TEST_DUMMY_ENCRYPTION 0x0004
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
+ EXT4_MF_TEST_DUMMY_ENCRYPTION))
+#else
+#define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
+#endif
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 2
@@ -1351,6 +1379,12 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ /* Encryption */
+ uint32_t s_file_encryption_mode;
+ uint32_t s_dir_encryption_mode;
+#endif
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1466,6 +1500,18 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_SB(sb) (sb)
#endif
+/*
+ * Returns true if the inode is inode is encrypted
+ */
+static inline int ext4_encrypted_inode(struct inode *inode)
+{
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
+#else
+ return 0;
+#endif
+}
+
#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
/*
@@ -1575,8 +1621,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
- EXT4_FEATURE_INCOMPAT_MMP | \
- EXT4_FEATURE_INCOMPAT_INLINE_DATA)
+ EXT4_FEATURE_INCOMPAT_MMP | \
+ EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
+ EXT4_FEATURE_INCOMPAT_ENCRYPT)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -2001,6 +2048,99 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
+/* crypto_policy.c */
+int ext4_is_child_context_consistent_with_parent(struct inode *parent,
+ struct inode *child);
+int ext4_inherit_context(struct inode *parent, struct inode *child);
+void ext4_to_hex(char *dst, char *src, size_t src_size);
+int ext4_process_policy(const struct ext4_encryption_policy *policy,
+ struct inode *inode);
+int ext4_get_policy(struct inode *inode,
+ struct ext4_encryption_policy *policy);
+
+/* crypto.c */
+bool ext4_valid_contents_enc_mode(uint32_t mode);
+uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
+extern struct workqueue_struct *ext4_read_workqueue;
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode);
+void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
+void ext4_restore_control_page(struct page *data_page);
+struct page *ext4_encrypt(struct inode *inode,
+ struct page *plaintext_page);
+int ext4_decrypt(struct ext4_crypto_ctx *ctx, struct page *page);
+int ext4_decrypt_one(struct inode *inode, struct page *page);
+int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+int ext4_init_crypto(void);
+void ext4_exit_crypto(void);
+static inline int ext4_sb_has_crypto(struct super_block *sb)
+{
+ return EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
+}
+#else
+static inline int ext4_init_crypto(void) { return 0; }
+static inline void ext4_exit_crypto(void) { }
+static inline int ext4_sb_has_crypto(struct super_block *sb)
+{
+ return 0;
+}
+#endif
+
+/* crypto_fname.c */
+bool ext4_valid_filenames_enc_mode(uint32_t mode);
+u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
+int ext4_fname_crypto_alloc_buffer(struct ext4_fname_crypto_ctx *ctx,
+ u32 ilen, struct ext4_str *crypto_str);
+int _ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_str *iname,
+ struct ext4_str *oname);
+int ext4_fname_disk_to_usr(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_dir_entry_2 *de,
+ struct ext4_str *oname);
+int ext4_fname_usr_to_disk(struct ext4_fname_crypto_ctx *ctx,
+ const struct qstr *iname,
+ struct ext4_str *oname);
+int ext4_fname_usr_to_hash(struct ext4_fname_crypto_ctx *ctx,
+ const struct qstr *iname,
+ struct dx_hash_info *hinfo);
+int ext4_fname_disk_to_hash(struct ext4_fname_crypto_ctx *ctx,
+ const struct ext4_dir_entry_2 *de,
+ struct dx_hash_info *hinfo);
+int ext4_fname_crypto_namelen_on_disk(struct ext4_fname_crypto_ctx *ctx,
+ u32 namelen);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx);
+struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
+ u32 max_len);
+void ext4_fname_crypto_free_buffer(struct ext4_str *crypto_str);
+#else
+static inline
+void ext4_put_fname_crypto_ctx(struct ext4_fname_crypto_ctx **ctx) { }
+static inline
+struct ext4_fname_crypto_ctx *ext4_get_fname_crypto_ctx(struct inode *inode,
+ u32 max_len)
+{
+ return NULL;
+}
+static inline void ext4_fname_crypto_free_buffer(struct ext4_str *p) { }
+#endif
+
+
+/* crypto_key.c */
+int ext4_generate_encryption_key(struct inode *inode);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+int ext4_has_encryption_key(struct inode *inode);
+#else
+static inline int ext4_has_encryption_key(struct inode *inode)
+{
+ return 0;
+}
+#endif
+
+
/* dir.c */
extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
struct file *,
@@ -2011,17 +2151,20 @@ extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
unlikely(__ext4_check_dir_entry(__func__, __LINE__, (dir), (filp), \
(de), (bh), (buf), (size), (offset)))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
- __u32 minor_hash,
- struct ext4_dir_entry_2 *dirent);
+ __u32 minor_hash,
+ struct ext4_dir_entry_2 *dirent,
+ struct ext4_str *ent_name);
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
const char *name, int namelen,
struct ext4_dir_entry_2 **dest_de);
-void ext4_insert_dentry(struct inode *inode,
+int ext4_insert_dentry(struct inode *dir,
+ struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
+ const struct qstr *iname,
const char *name, int namelen);
static inline void ext4_update_dx_flag(struct inode *inode)
{
@@ -2099,6 +2242,7 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
/* inode.c */
+int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
@@ -2152,8 +2296,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset);
+extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset);
extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks);
extern void ext4_ind_truncate(handle_t *, struct inode *inode);
@@ -2189,6 +2333,7 @@ extern int ext4_generic_delete_entry(handle_t *handle,
void *entry_buf,
int buf_size,
int csum_size);
+extern int ext4_empty_dir(struct inode *inode);
/* resize.c */
extern int ext4_group_add(struct super_block *sb,
@@ -2593,7 +2738,6 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
-extern const struct file_operations ext4_dax_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
@@ -2699,6 +2843,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
}
+/* readpages.c */
+extern int ext4_mpage_readpages(struct address_space *mapping,
+ struct list_head *pages, struct page *page,
+ unsigned nr_pages);
/* symlink.c */
extern const struct inode_operations ext4_symlink_inode_operations;
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
new file mode 100644
index 000000000000..c2ba35a914b6
--- /dev/null
+++ b/fs/ext4/ext4_crypto.h
@@ -0,0 +1,147 @@
+/*
+ * linux/fs/ext4/ext4_crypto.h
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption header content for ext4
+ *
+ * Written by Michael Halcrow, 2015.
+ */
+
+#ifndef _EXT4_CRYPTO_H
+#define _EXT4_CRYPTO_H
+
+#include <linux/fs.h>
+
+#define EXT4_KEY_DESCRIPTOR_SIZE 8
+
+/* Policy provided via an ioctl on the topmost directory */
+struct ext4_encryption_policy {
+ char version;
+ char contents_encryption_mode;
+ char filenames_encryption_mode;
+ char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
+} __attribute__((__packed__));
+
+#define EXT4_ENCRYPTION_CONTEXT_FORMAT_V1 1
+#define EXT4_KEY_DERIVATION_NONCE_SIZE 16
+
+/**
+ * Encryption context for inode
+ *
+ * Protector format:
+ * 1 byte: Protector format (1 = this version)
+ * 1 byte: File contents encryption mode
+ * 1 byte: File names encryption mode
+ * 1 byte: Reserved
+ * 8 bytes: Master Key descriptor
+ * 16 bytes: Encryption Key derivation nonce
+ */
+struct ext4_encryption_context {
+ char format;
+ char contents_encryption_mode;
+ char filenames_encryption_mode;
+ char reserved;
+ char master_key_descriptor[EXT4_KEY_DESCRIPTOR_SIZE];
+ char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE];
+} __attribute__((__packed__));
+
+/* Encryption parameters */
+#define EXT4_XTS_TWEAK_SIZE 16
+#define EXT4_AES_128_ECB_KEY_SIZE 16
+#define EXT4_AES_256_GCM_KEY_SIZE 32
+#define EXT4_AES_256_CBC_KEY_SIZE 32
+#define EXT4_AES_256_CTS_KEY_SIZE 32
+#define EXT4_AES_256_XTS_KEY_SIZE 64
+#define EXT4_MAX_KEY_SIZE 64
+
+#define EXT4_KEY_DESC_PREFIX "ext4:"
+#define EXT4_KEY_DESC_PREFIX_SIZE 5
+
+struct ext4_encryption_key {
+ uint32_t mode;
+ char raw[EXT4_MAX_KEY_SIZE];
+ uint32_t size;
+};
+
+#define EXT4_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
+#define EXT4_BOUNCE_PAGE_REQUIRES_FREE_ENCRYPT_FL 0x00000002
+
+struct ext4_crypto_ctx {
+ struct crypto_tfm *tfm; /* Crypto API context */
+ struct page *bounce_page; /* Ciphertext page on write path */
+ struct page *control_page; /* Original page on write path */
+ struct bio *bio; /* The bio for this context */
+ struct work_struct work; /* Work queue for read complete path */
+ struct list_head free_list; /* Free list */
+ int flags; /* Flags */
+ int mode; /* Encryption mode for tfm */
+};
+
+struct ext4_completion_result {
+ struct completion completion;
+ int res;
+};
+
+#define DECLARE_EXT4_COMPLETION_RESULT(ecr) \
+ struct ext4_completion_result ecr = { \
+ COMPLETION_INITIALIZER((ecr).completion), 0 }
+
+static inline int ext4_encryption_key_size(int mode)
+{
+ switch (mode) {
+ case EXT4_ENCRYPTION_MODE_AES_256_XTS:
+ return EXT4_AES_256_XTS_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_AES_256_GCM:
+ return EXT4_AES_256_GCM_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_AES_256_CBC:
+ return EXT4_AES_256_CBC_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_AES_256_CTS:
+ return EXT4_AES_256_CTS_KEY_SIZE;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+#define EXT4_FNAME_NUM_SCATTER_ENTRIES 4
+#define EXT4_CRYPTO_BLOCK_SIZE 16
+#define EXT4_FNAME_CRYPTO_DIGEST_SIZE 32
+
+struct ext4_str {
+ unsigned char *name;
+ u32 len;
+};
+
+struct ext4_fname_crypto_ctx {
+ u32 lim;
+ char tmp_buf[EXT4_CRYPTO_BLOCK_SIZE];
+ struct crypto_ablkcipher *ctfm;
+ struct crypto_hash *htfm;
+ struct page *workpage;
+ struct ext4_encryption_key key;
+ unsigned has_valid_key : 1;
+ unsigned ctfm_key_is_ready : 1;
+};
+
+/**
+ * For encrypted symlinks, the ciphertext length is stored at the beginning
+ * of the string in little-endian format.
+ */
+struct ext4_encrypted_symlink_data {
+ __le16 len;
+ char encrypted_path[1];
+} __attribute__((__packed__));
+
+/**
+ * This function is used to calculate the disk space required to
+ * store a filename of length l in encrypted symlink format.
+ */
+static inline u32 encrypted_symlink_data_len(u32 l)
+{
+ if (l < EXT4_CRYPTO_BLOCK_SIZE)
+ l = EXT4_CRYPTO_BLOCK_SIZE;
+ return (l + sizeof(struct ext4_encrypted_symlink_data) - 1);
+}
+
+#endif /* _EXT4_CRYPTO_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bed43081720f..973816bfe4a9 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1717,12 +1717,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
{
unsigned short ext1_ee_len, ext2_ee_len;
- /*
- * Make sure that both extents are initialized. We don't merge
- * unwritten extents so that we can be sure that end_io code has
- * the extent that was written properly split out and conversion to
- * initialized is trivial.
- */
if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
return 0;
@@ -3128,6 +3122,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, ex);
+
ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
if (ret > 0)
ret = 0;
@@ -4535,19 +4532,7 @@ got_allocated_blocks:
*/
reserved_clusters = get_reserved_cluster_alloc(inode,
map->m_lblk, allocated);
- if (map_from_cluster) {
- if (reserved_clusters) {
- /*
- * We have clusters reserved for this range.
- * But since we are not doing actual allocation
- * and are simply using blocks from previously
- * allocated cluster, we should release the
- * reservation and not claim quota.
- */
- ext4_da_update_reserve_space(inode,
- reserved_clusters, 0);
- }
- } else {
+ if (!map_from_cluster) {
BUG_ON(allocated_clusters < reserved_clusters);
if (reserved_clusters < allocated_clusters) {
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -4803,12 +4788,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
else
max_blocks -= lblk;
- flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
- EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
- EXT4_EX_NOCACHE;
- if (mode & FALLOC_FL_KEEP_SIZE)
- flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
-
mutex_lock(&inode->i_mutex);
/*
@@ -4825,15 +4804,28 @@ static long ext4_zero_range(struct file *file, loff_t offset,
ret = inode_newsize_ok(inode, new_size);
if (ret)
goto out_mutex;
- /*
- * If we have a partial block after EOF we have to allocate
- * the entire block.
- */
- if (partial_end)
- max_blocks += 1;
}
+ flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+
+ /* Preallocate the range including the unaligned edges */
+ if (partial_begin || partial_end) {
+ ret = ext4_alloc_file_blocks(file,
+ round_down(offset, 1 << blkbits) >> blkbits,
+ (round_up((offset + len), 1 << blkbits) -
+ round_down(offset, 1 << blkbits)) >> blkbits,
+ new_size, flags, mode);
+ if (ret)
+ goto out_mutex;
+
+ }
+
+ /* Zero range excluding the unaligned edges */
if (max_blocks > 0) {
+ flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ EXT4_EX_NOCACHE);
/* Now release the pages and zero block aligned part of pages*/
truncate_pagecache_range(inode, start, end - 1);
@@ -4847,19 +4839,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags, mode);
if (ret)
goto out_dio;
- /*
- * Remove entire range from the extent status tree.
- *
- * ext4_es_remove_extent(inode, lblk, max_blocks) is
- * NOT sufficient. I'm not sure why this is the case,
- * but let's be conservative and remove the extent
- * status tree for the entire inode. There should be
- * no outstanding delalloc extents thanks to the
- * filemap_write_and_wait_range() call above.
- */
- ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
- if (ret)
- goto out_dio;
}
if (!partial_begin && !partial_end)
goto out_dio;
@@ -4922,6 +4901,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
ext4_lblk_t lblk;
unsigned int blkbits = inode->i_blkbits;
+ /*
+ * Encrypted inodes can't handle collapse range or insert
+ * range since we would need to re-encrypt blocks with a
+ * different IV or XTS tweak (which are based on the logical
+ * block number).
+ *
+ * XXX It's not clear why zero range isn't working, but we'll
+ * leave it disabled for encrypted inodes for now. This is a
+ * bug we should fix....
+ */
+ if (ext4_encrypted_inode(inode) &&
+ (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
+ return -EOPNOTSUPP;
+
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e04d45733976..d33d5a6852b9 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -9,12 +9,10 @@
*
* Ext4 extents status tree core functions.
*/
-#include <linux/rbtree.h>
#include <linux/list_sort.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "ext4.h"
-#include "extents_status.h"
#include <trace/events/ext4.h>
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da16c9c..0613c256c344 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -20,12 +20,11 @@
#include <linux/time.h>
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
-#include <linux/aio.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
+#include <linux/uio.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -95,11 +94,9 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
struct mutex *aio_mutex = NULL;
struct blk_plug plug;
- int o_direct = io_is_direct(file);
+ int o_direct = iocb->ki_flags & IOCB_DIRECT;
int overwrite = 0;
- size_t length = iov_iter_count(from);
ssize_t ret;
- loff_t pos = iocb->ki_pos;
/*
* Unaligned direct AIO must be serialized; see comment above
@@ -108,16 +105,17 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct &&
ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) &&
- (file->f_flags & O_APPEND ||
- ext4_unaligned_aio(inode, from, pos))) {
+ (iocb->ki_flags & IOCB_APPEND ||
+ ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
aio_mutex = ext4_aio_mutex(inode);
mutex_lock(aio_mutex);
ext4_unwritten_wait(inode);
}
mutex_lock(&inode->i_mutex);
- if (file->f_flags & O_APPEND)
- iocb->ki_pos = pos = i_size_read(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
/*
* If we have encountered a bitmap-format file, the size limit
@@ -126,22 +124,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- if ((pos > sbi->s_bitmap_maxbytes) ||
- (pos == sbi->s_bitmap_maxbytes && length > 0)) {
- mutex_unlock(&inode->i_mutex);
+ if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
ret = -EFBIG;
- goto errout;
+ goto out;
}
-
- if (pos + length > sbi->s_bitmap_maxbytes)
- iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
+ iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
}
iocb->private = &overwrite;
if (o_direct) {
+ size_t length = iov_iter_count(from);
+ loff_t pos = iocb->ki_pos;
blk_start_plug(&plug);
-
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
@@ -185,7 +180,12 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct)
blk_finish_plug(&plug);
-errout:
+ if (aio_mutex)
+ mutex_unlock(aio_mutex);
+ return ret;
+
+out:
+ mutex_unlock(&inode->i_mutex);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret;
@@ -206,6 +206,7 @@ static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.page_mkwrite = ext4_dax_mkwrite,
+ .pfn_mkwrite = dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -219,6 +220,13 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
+ struct inode *inode = file->f_mapping->host;
+
+ if (ext4_encrypted_inode(inode)) {
+ int err = ext4_generate_encryption_key(inode);
+ if (err)
+ return 0;
+ }
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -236,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
struct vfsmount *mnt = filp->f_path.mnt;
struct path path;
char buf[64], *cp;
+ int ret;
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
!(sb->s_flags & MS_RDONLY))) {
@@ -274,11 +283,17 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
* writing and the journal is present
*/
if (filp->f_mode & FMODE_WRITE) {
- int ret = ext4_inode_attach_jinode(inode);
+ ret = ext4_inode_attach_jinode(inode);
if (ret < 0)
return ret;
}
- return dquot_file_open(inode, filp);
+ ret = dquot_file_open(inode, filp);
+ if (!ret && ext4_encrypted_inode(inode)) {
+ ret = ext4_generate_encryption_key(inode);
+ if (ret)
+ ret = -EACCES;
+ }
+ return ret;
}
/*
@@ -607,8 +622,6 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
const struct file_operations ext4_file_operations = {
.llseek = ext4_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = ext4_file_write_iter,
.unlocked_ioctl = ext4_ioctl,
@@ -624,26 +637,6 @@ const struct file_operations ext4_file_operations = {
.fallocate = ext4_fallocate,
};
-#ifdef CONFIG_FS_DAX
-const struct file_operations ext4_dax_file_operations = {
- .llseek = ext4_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
- .read_iter = generic_file_read_iter,
- .write_iter = ext4_file_write_iter,
- .unlocked_ioctl = ext4_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext4_compat_ioctl,
-#endif
- .mmap = ext4_file_mmap,
- .open = ext4_file_open,
- .release = ext4_release_file,
- .fsync = ext4_sync_file,
- /* Splice not yet supported with DAX */
- .fallocate = ext4_fallocate,
-};
-#endif
-
const struct inode_operations ext4_file_inode_operations = {
.setattr = ext4_setattr,
.getattr = ext4_getattr,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index a8bc47f75fa0..e9d632e9aa4b 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -26,7 +26,6 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/writeback.h>
-#include <linux/jbd2.h>
#include <linux/blkdev.h>
#include "ext4.h"
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3d586f02883e..e026aa941fd5 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -10,7 +10,6 @@
*/
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/cryptohash.h>
#include "ext4.h"
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac644c31ca67..2cf18a2d5c72 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -14,7 +14,6 @@
#include <linux/time.h>
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
@@ -997,6 +996,12 @@ got:
ei->i_block_group = group;
ei->i_last_alloc_group = ~0;
+ /* If the directory encrypted, then we should encrypt the inode. */
+ if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
+ (ext4_encrypted_inode(dir) ||
+ DUMMY_ENCRYPTION_ENABLED(sbi)))
+ ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+
ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
ext4_handle_sync(handle);
@@ -1029,11 +1034,28 @@ got:
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
-
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if ((sbi->s_file_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID) &&
+ (sbi->s_dir_encryption_mode == EXT4_ENCRYPTION_MODE_INVALID)) {
+ ei->i_inline_off = 0;
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+ EXT4_FEATURE_INCOMPAT_INLINE_DATA))
+ ext4_set_inode_state(inode,
+ EXT4_STATE_MAY_INLINE_DATA);
+ } else {
+ /* Inline data and encryption are incompatible
+ * We turn off inline data since encryption is enabled */
+ ei->i_inline_off = 1;
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb,
+ EXT4_FEATURE_INCOMPAT_INLINE_DATA))
+ ext4_clear_inode_state(inode,
+ EXT4_STATE_MAY_INLINE_DATA);
+ }
+#else
ei->i_inline_off = 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_INLINE_DATA))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
-
+#endif
ret = inode;
err = dquot_alloc_inode(inode);
if (err)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 45fe924f82bc..3580629e42d3 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,9 +20,9 @@
* (sct@redhat.com), 1993, 1998
*/
-#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
+#include <linux/uio.h>
#include <trace/events/ext4.h>
@@ -642,8 +642,8 @@ out:
* crashes then stale disk data _may_ be exposed inside the file. But current
* VFS code falls back into buffered path in that case so we are safe.
*/
-ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
int retries = 0;
- if (rw == WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
loff_t final_size = offset + count;
if (final_size > inode->i_size) {
@@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- if (rw == READ && ext4_should_dioread_nolock(inode)) {
+ if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) {
/*
* Nolock dioread optimization may be dynamically disabled
* via ext4_inode_block_unlocked_dio(). Check inode's state
@@ -690,23 +690,24 @@ retry:
goto locked;
}
if (IS_DAX(inode))
- ret = dax_do_io(rw, iocb, inode, iter, offset,
+ ret = dax_do_io(iocb, inode, iter, offset,
ext4_get_block, NULL, 0);
else
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iter, offset,
- ext4_get_block, NULL, NULL, 0);
+ ret = __blockdev_direct_IO(iocb, inode,
+ inode->i_sb->s_bdev, iter,
+ offset, ext4_get_block, NULL,
+ NULL, 0);
inode_dio_done(inode);
} else {
locked:
if (IS_DAX(inode))
- ret = dax_do_io(rw, iocb, inode, iter, offset,
+ ret = dax_do_io(iocb, inode, iter, offset,
ext4_get_block, NULL, DIO_LOCKING);
else
- ret = blockdev_direct_IO(rw, iocb, inode, iter,
- offset, ext4_get_block);
+ ret = blockdev_direct_IO(iocb, inode, iter, offset,
+ ext4_get_block);
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 4b143febf21f..feb2cafbeace 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -11,11 +11,13 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
+
+#include <linux/fiemap.h>
+
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
#include "truncate.h"
-#include <linux/fiemap.h>
#define EXT4_XATTR_SYSTEM_DATA "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -972,7 +974,7 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh,
offset = 0;
while ((void *)de < dlimit) {
de_len = ext4_rec_len_from_disk(de->rec_len, inline_size);
- trace_printk("de: off %u rlen %u name %*.s nlen %u ino %u\n",
+ trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n",
offset, de_len, de->name_len, de->name,
de->name_len, le32_to_cpu(de->inode));
if (ext4_check_dir_entry(dir, NULL, de, bh,
@@ -1014,7 +1016,8 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
err = ext4_journal_get_write_access(handle, iloc->bh);
if (err)
return err;
- ext4_insert_dentry(inode, de, inline_size, name, namelen);
+ ext4_insert_dentry(dir, inode, de, inline_size, &dentry->d_name,
+ name, namelen);
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
@@ -1327,6 +1330,7 @@ int htree_inlinedir_to_tree(struct file *dir_file,
struct ext4_iloc iloc;
void *dir_buf = NULL;
struct ext4_dir_entry_2 fake;
+ struct ext4_str tmp_str;
ret = ext4_get_inode_loc(inode, &iloc);
if (ret)
@@ -1398,8 +1402,10 @@ int htree_inlinedir_to_tree(struct file *dir_file,
continue;
if (de->inode == 0)
continue;
- err = ext4_htree_store_dirent(dir_file,
- hinfo->hash, hinfo->minor_hash, de);
+ tmp_str.name = de->name;
+ tmp_str.len = de->name_len;
+ err = ext4_htree_store_dirent(dir_file, hinfo->hash,
+ hinfo->minor_hash, de, &tmp_str);
if (err) {
count = err;
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5cb9a212b86f..366476e71e10 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -20,7 +20,6 @@
#include <linux/fs.h>
#include <linux/time.h>
-#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
@@ -36,8 +35,6 @@
#include <linux/kernel.h>
#include <linux/printk.h>
#include <linux/slab.h>
-#include <linux/ratelimit.h>
-#include <linux/aio.h>
#include <linux/bitops.h>
#include "ext4_jbd2.h"
@@ -141,7 +138,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
/*
* Test whether an inode is a fast symlink.
*/
-static int ext4_inode_is_fast_symlink(struct inode *inode)
+int ext4_inode_is_fast_symlink(struct inode *inode)
{
int ea_blocks = EXT4_I(inode)->i_file_acl ?
EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
@@ -888,6 +885,95 @@ int do_journal_get_write_access(handle_t *handle,
static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block)
+{
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned to = from + len;
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end;
+ sector_t block;
+ int err = 0;
+ unsigned blocksize = inode->i_sb->s_blocksize;
+ unsigned bbits;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+ bool decrypt = false;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(from > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > to);
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+ head = page_buffers(page);
+ bbits = ilog2(blocksize);
+ block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+
+ for (bh = head, block_start = 0; bh != head || !block_start;
+ block++, block_start = block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_end <= from || block_start >= to) {
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ }
+ continue;
+ }
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ if (!buffer_mapped(bh)) {
+ WARN_ON(bh->b_size != blocksize);
+ err = get_block(inode, block, bh, 1);
+ if (err)
+ break;
+ if (buffer_new(bh)) {
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ if (PageUptodate(page)) {
+ clear_buffer_new(bh);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ continue;
+ }
+ if (block_end > to || block_start < from)
+ zero_user_segments(page, to, block_end,
+ block_start, from);
+ continue;
+ }
+ }
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+ !buffer_unwritten(bh) &&
+ (block_start < from || block_end > to)) {
+ ll_rw_block(READ, 1, &bh);
+ *wait_bh++ = bh;
+ decrypt = ext4_encrypted_inode(inode) &&
+ S_ISREG(inode->i_mode);
+ }
+ }
+ /*
+ * If we issued read requests, let them complete.
+ */
+ while (wait_bh > wait) {
+ wait_on_buffer(*--wait_bh);
+ if (!buffer_uptodate(*wait_bh))
+ err = -EIO;
+ }
+ if (unlikely(err))
+ page_zero_new_buffers(page, from, to);
+ else if (decrypt)
+ err = ext4_decrypt_one(inode, page);
+ return err;
+}
+#endif
+
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -950,11 +1036,19 @@ retry_journal:
/* In case writeback began while the page was unlocked */
wait_for_stable_page(page);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (ext4_should_dioread_nolock(inode))
+ ret = ext4_block_write_begin(page, pos, len,
+ ext4_get_block_write);
+ else
+ ret = ext4_block_write_begin(page, pos, len,
+ ext4_get_block);
+#else
if (ext4_should_dioread_nolock(inode))
ret = __block_write_begin(page, pos, len, ext4_get_block_write);
else
ret = __block_write_begin(page, pos, len, ext4_get_block);
-
+#endif
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, page_buffers(page),
from, to, NULL,
@@ -2576,7 +2670,12 @@ retry_journal:
/* In case writeback began while the page was unlocked */
wait_for_stable_page(page);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ret = ext4_block_write_begin(page, pos, len,
+ ext4_da_get_block_prep);
+#else
ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
+#endif
if (ret < 0) {
unlock_page(page);
ext4_journal_stop(handle);
@@ -2822,7 +2921,7 @@ static int ext4_readpage(struct file *file, struct page *page)
ret = ext4_readpage_inline(inode, page);
if (ret == -EAGAIN)
- return mpage_readpage(page, ext4_get_block);
+ return ext4_mpage_readpages(page->mapping, NULL, page, 1);
return ret;
}
@@ -2837,7 +2936,7 @@ ext4_readpages(struct file *file, struct address_space *mapping,
if (ext4_has_inline_data(inode))
return 0;
- return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
+ return ext4_mpage_readpages(mapping, pages, NULL, nr_pages);
}
static void ext4_invalidatepage(struct page *page, unsigned int offset,
@@ -2953,8 +3052,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
* if the machine crashes during the write.
*
*/
-static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -2967,8 +3066,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ext4_io_end_t *io_end = NULL;
/* Use the old path for reads and writes beyond i_size. */
- if (rw != WRITE || final_size > inode->i_size)
- return ext4_ind_direct_IO(rw, iocb, iter, offset);
+ if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
+ return ext4_ind_direct_IO(iocb, iter, offset);
BUG_ON(iocb->private == NULL);
@@ -2977,7 +3076,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
* conversion. This also disallows race between truncate() and
* overwrite DIO as i_dio_count needs to be incremented under i_mutex.
*/
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
atomic_inc(&inode->i_dio_count);
/* If we do a overwrite dio, i_mutex locking can be released */
@@ -3034,11 +3133,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
+#endif
if (IS_DAX(inode))
- ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
+ ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
ext4_end_io_dio, dio_flags);
else
- ret = __blockdev_direct_IO(rw, iocb, inode,
+ ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter, offset,
get_block_func,
ext4_end_io_dio, NULL, dio_flags);
@@ -3079,7 +3181,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
}
retake_lock:
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
inode_dio_done(inode);
/* take i_mutex locking again if we do a ovewrite dio */
if (overwrite) {
@@ -3090,14 +3192,19 @@ retake_lock:
return ret;
}
-static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return 0;
+#endif
+
/*
* If we are doing data journalling we don't support O_DIRECT
*/
@@ -3108,12 +3215,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
if (ext4_has_inline_data(inode))
return 0;
- trace_ext4_direct_IO_enter(inode, offset, count, rw);
+ trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ret = ext4_ext_direct_IO(rw, iocb, iter, offset);
+ ret = ext4_ext_direct_IO(iocb, iter, offset);
else
- ret = ext4_ind_direct_IO(rw, iocb, iter, offset);
- trace_ext4_direct_IO_exit(inode, offset, count, rw, ret);
+ ret = ext4_ind_direct_IO(iocb, iter, offset);
+ trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
return ret;
}
@@ -3262,6 +3369,13 @@ static int __ext4_block_zero_page_range(handle_t *handle,
/* Uhhuh. Read error. Complain and punt. */
if (!buffer_uptodate(bh))
goto unlock;
+ if (S_ISREG(inode->i_mode) &&
+ ext4_encrypted_inode(inode)) {
+ /* We expect the key to be set. */
+ BUG_ON(!ext4_has_encryption_key(inode));
+ BUG_ON(blocksize != PAGE_CACHE_SIZE);
+ WARN_ON_ONCE(ext4_decrypt_one(inode, page));
+ }
}
if (ext4_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
@@ -4091,16 +4205,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
- if (test_opt(inode->i_sb, DAX))
- inode->i_fop = &ext4_dax_file_operations;
- else
- inode->i_fop = &ext4_file_operations;
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- if (ext4_inode_is_fast_symlink(inode)) {
+ if (ext4_inode_is_fast_symlink(inode) &&
+ !ext4_encrypted_inode(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f58a0d106726..2cb9e178d1c5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -8,12 +8,12 @@
*/
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/capability.h>
#include <linux/time.h>
#include <linux/compat.h>
#include <linux/mount.h>
#include <linux/file.h>
+#include <linux/random.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -196,6 +196,16 @@ journal_err_out:
return err;
}
+static int uuid_is_zero(__u8 u[16])
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (u[i])
+ return 0;
+ return 1;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -615,7 +625,78 @@ resizefs_out:
}
case EXT4_IOC_PRECACHE_EXTENTS:
return ext4_ext_precache(inode);
+ case EXT4_IOC_SET_ENCRYPTION_POLICY: {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct ext4_encryption_policy policy;
+ int err = 0;
+
+ if (copy_from_user(&policy,
+ (struct ext4_encryption_policy __user *)arg,
+ sizeof(policy))) {
+ err = -EFAULT;
+ goto encryption_policy_out;
+ }
+ err = ext4_process_policy(&policy, inode);
+encryption_policy_out:
+ return err;
+#else
+ return -EOPNOTSUPP;
+#endif
+ }
+ case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
+ int err, err2;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ handle_t *handle;
+
+ if (!ext4_sb_has_crypto(sb))
+ return -EOPNOTSUPP;
+ if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) {
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto pwsalt_err_exit;
+ }
+ err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+ if (err)
+ goto pwsalt_err_journal;
+ generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
+ err = ext4_handle_dirty_metadata(handle, NULL,
+ sbi->s_sbh);
+ pwsalt_err_journal:
+ err2 = ext4_journal_stop(handle);
+ if (err2 && !err)
+ err = err2;
+ pwsalt_err_exit:
+ mnt_drop_write_file(filp);
+ if (err)
+ return err;
+ }
+ if (copy_to_user((void *) arg, sbi->s_es->s_encrypt_pw_salt,
+ 16))
+ return -EFAULT;
+ return 0;
+ }
+ case EXT4_IOC_GET_ENCRYPTION_POLICY: {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct ext4_encryption_policy policy;
+ int err = 0;
+
+ if (!ext4_encrypted_inode(inode))
+ return -ENOENT;
+ err = ext4_get_policy(inode, &policy);
+ if (err)
+ return err;
+ if (copy_to_user((void *)arg, &policy, sizeof(policy)))
+ return -EFAULT;
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+ }
default:
return -ENOTTY;
}
@@ -680,6 +761,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FITRIM:
case EXT4_IOC_RESIZE_FS:
case EXT4_IOC_PRECACHE_EXTENTS:
+ case EXT4_IOC_SET_ENCRYPTION_POLICY:
+ case EXT4_IOC_GET_ENCRYPTION_PWSALT:
+ case EXT4_IOC_GET_ENCRYPTION_POLICY:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28fe71a2904c..ef22cd951c0c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -26,7 +26,6 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/jbd2.h>
#include <linux/time.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
@@ -254,8 +253,9 @@ static struct dx_frame *dx_probe(const struct qstr *d_name,
struct dx_hash_info *hinfo,
struct dx_frame *frame);
static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
- struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
+ unsigned blocksize, struct dx_hash_info *hinfo,
+ struct dx_map_entry map[]);
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
struct dx_map_entry *offsets, int count, unsigned blocksize);
@@ -586,8 +586,10 @@ struct stats
unsigned bcount;
};
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
- int size, int show_names)
+static struct stats dx_show_leaf(struct inode *dir,
+ struct dx_hash_info *hinfo,
+ struct ext4_dir_entry_2 *de,
+ int size, int show_names)
{
unsigned names = 0, space = 0;
char *base = (char *) de;
@@ -600,12 +602,80 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
{
if (show_names)
{
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ int len;
+ char *name;
+ struct ext4_str fname_crypto_str
+ = {.name = NULL, .len = 0};
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ int res;
+
+ name = de->name;
+ len = de->name_len;
+ ctx = ext4_get_fname_crypto_ctx(dir,
+ EXT4_NAME_LEN);
+ if (IS_ERR(ctx)) {
+ printk(KERN_WARNING "Error acquiring"
+ " crypto ctxt--skipping crypto\n");
+ ctx = NULL;
+ }
+ if (ctx == NULL) {
+ /* Directory is not encrypted */
+ ext4fs_dirhash(de->name,
+ de->name_len, &h);
+ printk("%*.s:(U)%x.%u ", len,
+ name, h.hash,
+ (unsigned) ((char *) de
+ - base));
+ } else {
+ /* Directory is encrypted */
+ res = ext4_fname_crypto_alloc_buffer(
+ ctx, de->name_len,
+ &fname_crypto_str);
+ if (res < 0) {
+ printk(KERN_WARNING "Error "
+ "allocating crypto "
+ "buffer--skipping "
+ "crypto\n");
+ ext4_put_fname_crypto_ctx(&ctx);
+ ctx = NULL;
+ }
+ res = ext4_fname_disk_to_usr(ctx, de,
+ &fname_crypto_str);
+ if (res < 0) {
+ printk(KERN_WARNING "Error "
+ "converting filename "
+ "from disk to usr"
+ "\n");
+ name = "??";
+ len = 2;
+ } else {
+ name = fname_crypto_str.name;
+ len = fname_crypto_str.len;
+ }
+ res = ext4_fname_disk_to_hash(ctx, de,
+ &h);
+ if (res < 0) {
+ printk(KERN_WARNING "Error "
+ "converting filename "
+ "from disk to htree"
+ "\n");
+ h.hash = 0xDEADBEEF;
+ }
+ printk("%*.s:(E)%x.%u ", len, name,
+ h.hash, (unsigned) ((char *) de
+ - base));
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(
+ &fname_crypto_str);
+ }
+#else
int len = de->name_len;
char *name = de->name;
- while (len--) printk("%c", *name++);
ext4fs_dirhash(de->name, de->name_len, &h);
- printk(":%x.%u ", h.hash,
+ printk("%*.s:%x.%u ", len, name, h.hash,
(unsigned) ((char *) de - base));
+#endif
}
space += EXT4_DIR_REC_LEN(de->name_len);
names++;
@@ -623,7 +693,6 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
unsigned count = dx_get_count(entries), names = 0, space = 0, i;
unsigned bcount = 0;
struct buffer_head *bh;
- int err;
printk("%i indexed blocks...\n", count);
for (i = 0; i < count; i++, entries++)
{
@@ -637,7 +706,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
continue;
stats = levels?
dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
- dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
+ dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
+ bh->b_data, blocksize, 0);
names += stats.names;
space += stats.space;
bcount += stats.bcount;
@@ -687,8 +757,28 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (d_name) {
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ int res;
+
+ /* Check if the directory is encrypted */
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx)) {
+ ret_err = ERR_PTR(PTR_ERR(ctx));
+ goto fail;
+ }
+ res = ext4_fname_usr_to_hash(ctx, d_name, hinfo);
+ if (res < 0) {
+ ret_err = ERR_PTR(res);
+ goto fail;
+ }
+ ext4_put_fname_crypto_ctx(&ctx);
+ }
+#else
if (d_name)
ext4fs_dirhash(d_name->name, d_name->len, hinfo);
+#endif
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
@@ -773,6 +863,7 @@ fail:
brelse(frame->bh);
frame--;
}
+
if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
ext4_warning(dir->i_sb,
"Corrupt dir inode %lu, running e2fsck is "
@@ -878,6 +969,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
struct buffer_head *bh;
struct ext4_dir_entry_2 *de, *top;
int err = 0, count = 0;
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}, tmp_str;
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
@@ -889,6 +982,24 @@ static int htree_dirblock_to_tree(struct file *dir_file,
top = (struct ext4_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0));
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ /* Check if the directory is encrypted */
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ brelse(bh);
+ return err;
+ }
+ if (ctx != NULL) {
+ err = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
+ &fname_crypto_str);
+ if (err < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ brelse(bh);
+ return err;
+ }
+ }
+#endif
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
bh->b_data, bh->b_size,
@@ -897,21 +1008,52 @@ static int htree_dirblock_to_tree(struct file *dir_file,
/* silently ignore the rest of the block */
break;
}
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ err = ext4_fname_disk_to_hash(ctx, de, hinfo);
+ if (err < 0) {
+ count = err;
+ goto errout;
+ }
+#else
ext4fs_dirhash(de->name, de->name_len, hinfo);
+#endif
if ((hinfo->hash < start_hash) ||
((hinfo->hash == start_hash) &&
(hinfo->minor_hash < start_minor_hash)))
continue;
if (de->inode == 0)
continue;
- if ((err = ext4_htree_store_dirent(dir_file,
- hinfo->hash, hinfo->minor_hash, de)) != 0) {
- brelse(bh);
- return err;
+ if (ctx == NULL) {
+ /* Directory is not encrypted */
+ tmp_str.name = de->name;
+ tmp_str.len = de->name_len;
+ err = ext4_htree_store_dirent(dir_file,
+ hinfo->hash, hinfo->minor_hash, de,
+ &tmp_str);
+ } else {
+ /* Directory is encrypted */
+ err = ext4_fname_disk_to_usr(ctx, de,
+ &fname_crypto_str);
+ if (err < 0) {
+ count = err;
+ goto errout;
+ }
+ err = ext4_htree_store_dirent(dir_file,
+ hinfo->hash, hinfo->minor_hash, de,
+ &fname_crypto_str);
+ }
+ if (err != 0) {
+ count = err;
+ goto errout;
}
count++;
}
+errout:
brelse(bh);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+#endif
return count;
}
@@ -935,6 +1077,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
int count = 0;
int ret, err;
__u32 hashval;
+ struct ext4_str tmp_str;
dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
start_hash, start_minor_hash));
@@ -970,14 +1113,22 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
/* Add '.' and '..' from the htree header */
if (!start_hash && !start_minor_hash) {
de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
- if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+ tmp_str.name = de->name;
+ tmp_str.len = de->name_len;
+ err = ext4_htree_store_dirent(dir_file, 0, 0,
+ de, &tmp_str);
+ if (err != 0)
goto errout;
count++;
}
if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
de = ext4_next_entry(de, dir->i_sb->s_blocksize);
- if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
+ tmp_str.name = de->name;
+ tmp_str.len = de->name_len;
+ err = ext4_htree_store_dirent(dir_file, 2, 0,
+ de, &tmp_str);
+ if (err != 0)
goto errout;
count++;
}
@@ -1035,17 +1186,33 @@ static inline int search_dirblock(struct buffer_head *bh,
* Create map of hash values, offsets, and sizes, stored at end of block.
* Returns number of entries mapped.
*/
-static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
- struct dx_hash_info *hinfo,
+static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
+ unsigned blocksize, struct dx_hash_info *hinfo,
struct dx_map_entry *map_tail)
{
int count = 0;
char *base = (char *) de;
struct dx_hash_info h = *hinfo;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ int err;
+
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+#endif
while ((char *) de < base + blocksize) {
if (de->name_len && de->inode) {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ err = ext4_fname_disk_to_hash(ctx, de, &h);
+ if (err < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return err;
+ }
+#else
ext4fs_dirhash(de->name, de->name_len, &h);
+#endif
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
@@ -1056,6 +1223,9 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
/* XXX: do we need to check rec_len == 0 case? -Chris */
de = ext4_next_entry(de, blocksize);
}
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ext4_put_fname_crypto_ctx(&ctx);
+#endif
return count;
}
@@ -1106,57 +1276,107 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
* `len <= EXT4_NAME_LEN' is guaranteed by caller.
* `de != NULL' is guaranteed by caller.
*/
-static inline int ext4_match (int len, const char * const name,
- struct ext4_dir_entry_2 * de)
+static inline int ext4_match(struct ext4_fname_crypto_ctx *ctx,
+ struct ext4_str *fname_crypto_str,
+ int len, const char * const name,
+ struct ext4_dir_entry_2 *de)
{
- if (len != de->name_len)
- return 0;
+ int res;
+
if (!de->inode)
return 0;
- return !memcmp(name, de->name, len);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (ctx) {
+ /* Directory is encrypted */
+ res = ext4_fname_disk_to_usr(ctx, de, fname_crypto_str);
+ if (res < 0)
+ return res;
+ if (len != res)
+ return 0;
+ res = memcmp(name, fname_crypto_str->name, len);
+ return (res == 0) ? 1 : 0;
+ }
+#endif
+ if (len != de->name_len)
+ return 0;
+ res = memcmp(name, de->name, len);
+ return (res == 0) ? 1 : 0;
}
/*
* Returns 0 if not found, -1 on failure, and 1 on success
*/
-int search_dir(struct buffer_head *bh,
- char *search_buf,
- int buf_size,
- struct inode *dir,
- const struct qstr *d_name,
- unsigned int offset,
- struct ext4_dir_entry_2 **res_dir)
+int search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
+ struct inode *dir, const struct qstr *d_name,
+ unsigned int offset, struct ext4_dir_entry_2 **res_dir)
{
struct ext4_dir_entry_2 * de;
char * dlimit;
int de_len;
const char *name = d_name->name;
int namelen = d_name->len;
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ int res;
+
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx))
+ return -1;
+
+ if (ctx != NULL) {
+ /* Allocate buffer to hold maximum name length */
+ res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
+ &fname_crypto_str);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return -1;
+ }
+ }
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
+ if ((char *) de + de->name_len <= dlimit) {
+ res = ext4_match(ctx, &fname_crypto_str, namelen,
+ name, de);
+ if (res < 0) {
+ res = -1;
+ goto return_result;
+ }
+ if (res > 0) {
+ /* found a match - just to be sure, do
+ * a full check */
+ if (ext4_check_dir_entry(dir, NULL, de, bh,
+ bh->b_data,
+ bh->b_size, offset)) {
+ res = -1;
+ goto return_result;
+ }
+ *res_dir = de;
+ res = 1;
+ goto return_result;
+ }
- if ((char *) de + namelen <= dlimit &&
- ext4_match (namelen, name, de)) {
- /* found a match - just to be sure, do a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
- return -1;
- *res_dir = de;
- return 1;
}
/* prevent looping on a bad block */
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
- if (de_len <= 0)
- return -1;
+ if (de_len <= 0) {
+ res = -1;
+ goto return_result;
+ }
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
- return 0;
+
+ res = 0;
+return_result:
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+ return res;
}
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1345,6 +1565,9 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
ext4_lblk_t block;
int retval;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ *res_dir = NULL;
+#endif
frame = dx_probe(d_name, dir, &hinfo, frames);
if (IS_ERR(frame))
return (struct buffer_head *) frame;
@@ -1417,6 +1640,18 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
ino);
return ERR_PTR(-EIO);
}
+ if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
+ (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) &&
+ !ext4_is_child_context_consistent_with_parent(dir,
+ inode)) {
+ iput(inode);
+ ext4_warning(inode->i_sb,
+ "Inconsistent encryption contexts: %lu/%lu\n",
+ (unsigned long) dir->i_ino,
+ (unsigned long) inode->i_ino);
+ return ERR_PTR(-EPERM);
+ }
}
return d_splice_alias(inode, dentry);
}
@@ -1541,7 +1776,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* create map in the end of data2 block */
map = (struct dx_map_entry *) (data2 + blocksize);
- count = dx_make_map((struct ext4_dir_entry_2 *) data1,
+ count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
@@ -1564,7 +1799,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
hash2, split, count-split));
/* Fancy dance to stay within two buffers */
- de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
+ de2 = dx_move_dirents(data1, data2, map + split, count - split,
+ blocksize);
de = dx_pack_dirents(data1, blocksize);
de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
(char *) de,
@@ -1580,8 +1816,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
initialize_dirent_tail(t, blocksize);
}
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
- dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
+ dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
+ blocksize, 1));
+ dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
+ blocksize, 1));
/* Which block gets the new entry? */
if (hinfo->hash >= hash2) {
@@ -1618,15 +1856,48 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
int nlen, rlen;
unsigned int offset = 0;
char *top;
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ int res;
+
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx))
+ return -1;
+
+ if (ctx != NULL) {
+ /* Calculate record length needed to store the entry */
+ res = ext4_fname_crypto_namelen_on_disk(ctx, namelen);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return res;
+ }
+ reclen = EXT4_DIR_REC_LEN(res);
+
+ /* Allocate buffer to hold maximum name length */
+ res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
+ &fname_crypto_str);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return -1;
+ }
+ }
de = (struct ext4_dir_entry_2 *)buf;
top = buf + buf_size - reclen;
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- buf, buf_size, offset))
- return -EIO;
- if (ext4_match(namelen, name, de))
- return -EEXIST;
+ buf, buf_size, offset)) {
+ res = -EIO;
+ goto return_result;
+ }
+ /* Provide crypto context and crypto buffer to ext4 match */
+ res = ext4_match(ctx, &fname_crypto_str, namelen, name, de);
+ if (res < 0)
+ goto return_result;
+ if (res > 0) {
+ res = -EEXIST;
+ goto return_result;
+ }
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1634,26 +1905,62 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
- if ((char *) de > top)
- return -ENOSPC;
- *dest_de = de;
- return 0;
+ if ((char *) de > top)
+ res = -ENOSPC;
+ else {
+ *dest_de = de;
+ res = 0;
+ }
+return_result:
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+ return res;
}
-void ext4_insert_dentry(struct inode *inode,
- struct ext4_dir_entry_2 *de,
- int buf_size,
- const char *name, int namelen)
+int ext4_insert_dentry(struct inode *dir,
+ struct inode *inode,
+ struct ext4_dir_entry_2 *de,
+ int buf_size,
+ const struct qstr *iname,
+ const char *name, int namelen)
{
int nlen, rlen;
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ struct ext4_str tmp_str;
+ int res;
+
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx))
+ return -EIO;
+ /* By default, the input name would be written to the disk */
+ tmp_str.name = (unsigned char *)name;
+ tmp_str.len = namelen;
+ if (ctx != NULL) {
+ /* Directory is encrypted */
+ res = ext4_fname_crypto_alloc_buffer(ctx, EXT4_NAME_LEN,
+ &fname_crypto_str);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return -ENOMEM;
+ }
+ res = ext4_fname_usr_to_disk(ctx, iname, &fname_crypto_str);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+ return res;
+ }
+ tmp_str.name = fname_crypto_str.name;
+ tmp_str.len = fname_crypto_str.len;
+ }
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if (de->inode) {
struct ext4_dir_entry_2 *de1 =
- (struct ext4_dir_entry_2 *)((char *)de + nlen);
+ (struct ext4_dir_entry_2 *)((char *)de + nlen);
de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
de = de1;
@@ -1661,9 +1968,14 @@ void ext4_insert_dentry(struct inode *inode,
de->file_type = EXT4_FT_UNKNOWN;
de->inode = cpu_to_le32(inode->i_ino);
ext4_set_de_type(inode->i_sb, de, inode->i_mode);
- de->name_len = namelen;
- memcpy(de->name, name, namelen);
+ de->name_len = tmp_str.len;
+
+ memcpy(de->name, tmp_str.name, tmp_str.len);
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_fname_crypto_free_buffer(&fname_crypto_str);
+ return 0;
}
+
/*
* Add a new entry into a directory (leaf) block. If de is non-NULL,
* it points to a directory entry which is guaranteed to be large
@@ -1700,8 +2012,12 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return err;
}
- /* By now the buffer is marked for journaling */
- ext4_insert_dentry(inode, de, blocksize, name, namelen);
+ /* By now the buffer is marked for journaling. Due to crypto operations,
+ * the following function call may fail */
+ err = ext4_insert_dentry(dir, inode, de, blocksize, &dentry->d_name,
+ name, namelen);
+ if (err < 0)
+ return err;
/*
* XXX shouldn't update any times until successful
@@ -1733,8 +2049,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
struct inode *inode, struct buffer_head *bh)
{
struct inode *dir = dentry->d_parent->d_inode;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ int res;
+#else
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
+#endif
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
@@ -1748,7 +2069,13 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
struct dx_hash_info hinfo;
ext4_lblk_t block;
struct fake_dirent *fde;
- int csum_size = 0;
+ int csum_size = 0;
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ctx = ext4_get_fname_crypto_ctx(dir, EXT4_NAME_LEN);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+#endif
if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);
@@ -1815,7 +2142,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
if (hinfo.hash_version <= DX_HASH_TEA)
hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ res = ext4_fname_usr_to_hash(ctx, &dentry->d_name, &hinfo);
+ if (res < 0) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ ext4_mark_inode_dirty(handle, dir);
+ brelse(bh);
+ return res;
+ }
+ ext4_put_fname_crypto_ctx(&ctx);
+#else
ext4fs_dirhash(name, namelen, &hinfo);
+#endif
memset(frames, 0, sizeof(frames));
frame = frames;
frame->entries = entries;
@@ -1865,7 +2203,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
- struct buffer_head *bh;
+ struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct ext4_dir_entry_tail *t;
struct super_block *sb;
@@ -1889,14 +2227,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (retval == 1) {
retval = 0;
- return retval;
+ goto out;
}
}
if (is_dx(dir)) {
retval = ext4_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
- return retval;
+ goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
@@ -1908,14 +2246,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return PTR_ERR(bh);
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
- if (retval != -ENOSPC) {
- brelse(bh);
- return retval;
- }
+ if (retval != -ENOSPC)
+ goto out;
if (blocks == 1 && !dx_fallback &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
- return make_indexed_dir(handle, dentry, inode, bh);
+ EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ retval = make_indexed_dir(handle, dentry, inode, bh);
+ bh = NULL; /* make_indexed_dir releases bh */
+ goto out;
+ }
brelse(bh);
}
bh = ext4_append(handle, dir, &block);
@@ -1931,6 +2270,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+out:
brelse(bh);
if (retval == 0)
ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
@@ -2235,12 +2575,22 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
- if (test_opt(inode->i_sb, DAX))
- inode->i_fop = &ext4_dax_file_operations;
- else
- inode->i_fop = &ext4_file_operations;
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
- err = ext4_add_nondir(handle, dentry, inode);
+ err = 0;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (!err && (ext4_encrypted_inode(dir) ||
+ DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)))) {
+ err = ext4_inherit_context(dir, inode);
+ if (err) {
+ clear_nlink(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ }
+ }
+#endif
+ if (!err)
+ err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
}
@@ -2302,10 +2652,7 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
- if (test_opt(inode->i_sb, DAX))
- inode->i_fop = &ext4_dax_file_operations;
- else
- inode->i_fop = &ext4_file_operations;
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
d_tmpfile(dentry, inode);
err = ext4_orphan_add(handle, inode);
@@ -2424,6 +2771,14 @@ retry:
err = ext4_init_new_dir(handle, dir, inode);
if (err)
goto out_clear_inode;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (ext4_encrypted_inode(dir) ||
+ DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) {
+ err = ext4_inherit_context(dir, inode);
+ if (err)
+ goto out_clear_inode;
+ }
+#endif
err = ext4_mark_inode_dirty(handle, inode);
if (!err)
err = ext4_add_entry(handle, dentry, inode);
@@ -2456,7 +2811,7 @@ out_stop:
/*
* routine to check that the specified directory is empty (for rmdir)
*/
-static int empty_dir(struct inode *inode)
+int ext4_empty_dir(struct inode *inode)
{
unsigned int offset;
struct buffer_head *bh;
@@ -2724,7 +3079,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;
retval = -ENOTEMPTY;
- if (!empty_dir(inode))
+ if (!ext4_empty_dir(inode))
goto end_rmdir;
handle = ext4_journal_start(dir, EXT4_HT_DIR,
@@ -2834,16 +3189,25 @@ static int ext4_symlink(struct inode *dir,
{
handle_t *handle;
struct inode *inode;
- int l, err, retries = 0;
+ int err, len = strlen(symname);
int credits;
-
- l = strlen(symname)+1;
- if (l > dir->i_sb->s_blocksize)
+ bool encryption_required;
+ struct ext4_str disk_link;
+ struct ext4_encrypted_symlink_data *sd = NULL;
+
+ disk_link.len = len + 1;
+ disk_link.name = (char *) symname;
+
+ encryption_required = (ext4_encrypted_inode(dir) ||
+ DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
+ if (encryption_required)
+ disk_link.len = encrypted_symlink_data_len(len) + 1;
+ if (disk_link.len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
dquot_initialize(dir);
- if (l > EXT4_N_BLOCKS * 4) {
+ if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
/*
* For non-fast symlinks, we just allocate inode and put it on
* orphan list in the first transaction => we need bitmap,
@@ -2862,16 +3226,49 @@ static int ext4_symlink(struct inode *dir,
credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
}
-retry:
+
inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
&dentry->d_name, 0, NULL,
EXT4_HT_DIR, credits);
handle = ext4_journal_current_handle();
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out_stop;
+ if (IS_ERR(inode)) {
+ if (handle)
+ ext4_journal_stop(handle);
+ return PTR_ERR(inode);
+ }
+
+ if (encryption_required) {
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct qstr istr;
+ struct ext4_str ostr;
+
+ sd = kzalloc(disk_link.len, GFP_NOFS);
+ if (!sd) {
+ err = -ENOMEM;
+ goto err_drop_inode;
+ }
+ err = ext4_inherit_context(dir, inode);
+ if (err)
+ goto err_drop_inode;
+ ctx = ext4_get_fname_crypto_ctx(inode,
+ inode->i_sb->s_blocksize);
+ if (IS_ERR_OR_NULL(ctx)) {
+ /* We just set the policy, so ctx should not be NULL */
+ err = (ctx == NULL) ? -EIO : PTR_ERR(ctx);
+ goto err_drop_inode;
+ }
+ istr.name = (const unsigned char *) symname;
+ istr.len = len;
+ ostr.name = sd->encrypted_path;
+ err = ext4_fname_usr_to_disk(ctx, &istr, &ostr);
+ ext4_put_fname_crypto_ctx(&ctx);
+ if (err < 0)
+ goto err_drop_inode;
+ sd->len = cpu_to_le16(ostr.len);
+ disk_link.name = (char *) sd;
+ }
- if (l > EXT4_N_BLOCKS * 4) {
+ if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
/*
@@ -2887,9 +3284,10 @@ retry:
drop_nlink(inode);
err = ext4_orphan_add(handle, inode);
ext4_journal_stop(handle);
+ handle = NULL;
if (err)
goto err_drop_inode;
- err = __page_symlink(inode, symname, l, 1);
+ err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
if (err)
goto err_drop_inode;
/*
@@ -2901,34 +3299,37 @@ retry:
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
+ handle = NULL;
goto err_drop_inode;
}
set_nlink(inode, 1);
err = ext4_orphan_del(handle, inode);
- if (err) {
- ext4_journal_stop(handle);
- clear_nlink(inode);
+ if (err)
goto err_drop_inode;
- }
} else {
/* clear the extent format for fast symlink */
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
- inode->i_op = &ext4_fast_symlink_inode_operations;
- memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
- inode->i_size = l-1;
+ inode->i_op = encryption_required ?
+ &ext4_symlink_inode_operations :
+ &ext4_fast_symlink_inode_operations;
+ memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
+ disk_link.len);
+ inode->i_size = disk_link.len - 1;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
ext4_handle_sync(handle);
-out_stop:
if (handle)
ext4_journal_stop(handle);
- if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
- goto retry;
+ kfree(sd);
return err;
err_drop_inode:
+ if (handle)
+ ext4_journal_stop(handle);
+ kfree(sd);
+ clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return err;
@@ -2943,7 +3344,9 @@ static int ext4_link(struct dentry *old_dentry,
if (inode->i_nlink >= EXT4_LINK_MAX)
return -EMLINK;
-
+ if (ext4_encrypted_inode(dir) &&
+ !ext4_is_child_context_consistent_with_parent(dir, inode))
+ return -EPERM;
dquot_initialize(dir);
retry:
@@ -3244,6 +3647,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
goto end_rename;
+ if ((old.dir != new.dir) &&
+ ext4_encrypted_inode(new.dir) &&
+ !ext4_is_child_context_consistent_with_parent(new.dir,
+ old.inode)) {
+ retval = -EPERM;
+ goto end_rename;
+ }
+
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
if (IS_ERR(new.bh)) {
@@ -3264,12 +3675,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
if (!(flags & RENAME_WHITEOUT)) {
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ handle = NULL;
+ goto end_rename;
+ }
} else {
whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
- if (IS_ERR(whiteout))
- return PTR_ERR(whiteout);
+ if (IS_ERR(whiteout)) {
+ retval = PTR_ERR(whiteout);
+ whiteout = NULL;
+ goto end_rename;
+ }
}
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
@@ -3278,7 +3695,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (S_ISDIR(old.inode->i_mode)) {
if (new.inode) {
retval = -ENOTEMPTY;
- if (!empty_dir(new.inode))
+ if (!ext4_empty_dir(new.inode))
goto end_rename;
} else {
retval = -EMLINK;
@@ -3352,8 +3769,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
ext4_dec_count(handle, old.dir);
if (new.inode) {
- /* checked empty_dir above, can't have another parent,
- * ext4_dec_count() won't work for many-linked dirs */
+ /* checked ext4_empty_dir above, can't have another
+ * parent, ext4_dec_count() won't work for many-linked
+ * dirs */
clear_nlink(new.inode);
} else {
ext4_inc_count(handle, new.dir);
@@ -3433,8 +3851,11 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ handle = NULL;
+ goto end_rename;
+ }
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a2541a9ba..5765f88b3904 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -8,7 +8,6 @@
#include <linux/fs.h>
#include <linux/time.h>
-#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
@@ -18,14 +17,12 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
-#include <linux/aio.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/ratelimit.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -69,6 +66,10 @@ static void ext4_finish_bio(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct page *data_page = NULL;
+ struct ext4_crypto_ctx *ctx = NULL;
+#endif
struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset;
unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,6 +79,15 @@ static void ext4_finish_bio(struct bio *bio)
if (!page)
continue;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (!page->mapping) {
+ /* The bounce data pages are unmapped. */
+ data_page = page;
+ ctx = (struct ext4_crypto_ctx *)page_private(data_page);
+ page = ctx->control_page;
+ }
+#endif
+
if (error) {
SetPageError(page);
set_bit(AS_EIO, &page->mapping->flags);
@@ -102,8 +112,13 @@ static void ext4_finish_bio(struct bio *bio)
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
- if (!under_io)
+ if (!under_io) {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ if (ctx)
+ ext4_restore_control_page(data_page);
+#endif
end_page_writeback(page);
+ }
}
}
@@ -378,6 +393,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
+ struct page *page,
struct buffer_head *bh)
{
int ret;
@@ -391,7 +407,7 @@ submit_and_retry:
if (ret)
return ret;
}
- ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
io->io_next_block++;
@@ -404,6 +420,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
struct writeback_control *wbc,
bool keep_towrite)
{
+ struct page *data_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start, blocksize;
struct buffer_head *bh, *head;
@@ -463,19 +480,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
set_buffer_async_write(bh);
} while ((bh = bh->b_this_page) != head);
- /* Now submit buffers to write */
bh = head = page_buffers(page);
+
+ if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ data_page = ext4_encrypt(inode, page);
+ if (IS_ERR(data_page)) {
+ ret = PTR_ERR(data_page);
+ data_page = NULL;
+ goto out;
+ }
+ }
+
+ /* Now submit buffers to write */
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode, bh);
+ ret = io_submit_add_bh(io, inode,
+ data_page ? data_page : page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
- redirty_page_for_writepage(wbc, page);
break;
}
nr_submitted++;
@@ -484,6 +511,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
+ out:
+ if (data_page)
+ ext4_restore_control_page(data_page);
+ printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
+ redirty_page_for_writepage(wbc, page);
do {
clear_buffer_async_write(bh);
bh = bh->b_this_page;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
new file mode 100644
index 000000000000..171b9ac4b45e
--- /dev/null
+++ b/fs/ext4/readpage.c
@@ -0,0 +1,328 @@
+/*
+ * linux/fs/ext4/readpage.c
+ *
+ * Copyright (C) 2002, Linus Torvalds.
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This was originally taken from fs/mpage.c
+ *
+ * The intent is the ext4_mpage_readpages() function here is intended
+ * to replace mpage_readpages() in the general case, not just for
+ * encrypted files. It has some limitations (see below), where it
+ * will fall back to read_block_full_page(), but these limitations
+ * should only be hit when page_size != block_size.
+ *
+ * This will allow us to attach a callback function to support ext4
+ * encryption.
+ *
+ * If anything unusual happens, such as:
+ *
+ * - encountering a page which has buffers
+ * - encountering a page which has a non-hole after a hole
+ * - encountering a page with non-contiguous blocks
+ *
+ * then this code just gives up and calls the buffer_head-based read function.
+ * It does handle a page which has holes at the end - that is a common case:
+ * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/kdev_t.h>
+#include <linux/gfp.h>
+#include <linux/bio.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
+#include <linux/highmem.h>
+#include <linux/prefetch.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
+#include <linux/backing-dev.h>
+#include <linux/pagevec.h>
+#include <linux/cleancache.h>
+
+#include "ext4.h"
+
+/*
+ * Call ext4_decrypt on every single page, reusing the encryption
+ * context.
+ */
+static void completion_pages(struct work_struct *work)
+{
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ struct ext4_crypto_ctx *ctx =
+ container_of(work, struct ext4_crypto_ctx, work);
+ struct bio *bio = ctx->bio;
+ struct bio_vec *bv;
+ int i;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+
+ int ret = ext4_decrypt(ctx, page);
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ ext4_release_crypto_ctx(ctx);
+ bio_put(bio);
+#else
+ BUG();
+#endif
+}
+
+static inline bool ext4_bio_encrypted(struct bio *bio)
+{
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ return unlikely(bio->bi_private != NULL);
+#else
+ return false;
+#endif
+}
+
+/*
+ * I/O completion handler for multipage BIOs.
+ *
+ * The mpage code never puts partial pages into a BIO (except for end-of-file).
+ * If a page does not map to a contiguous run of blocks then it simply falls
+ * back to block_read_full_page().
+ *
+ * Why is this? If a page's completion depends on a number of different BIOs
+ * which can complete in any order (or at the same time) then determining the
+ * status of that page is hard. See end_buffer_async_read() for the details.
+ * There is no point in duplicating all that complexity.
+ */
+static void mpage_end_io(struct bio *bio, int err)
+{
+ struct bio_vec *bv;
+ int i;
+
+ if (ext4_bio_encrypted(bio)) {
+ struct ext4_crypto_ctx *ctx = bio->bi_private;
+
+ if (err) {
+ ext4_release_crypto_ctx(ctx);
+ } else {
+ INIT_WORK(&ctx->work, completion_pages);
+ ctx->bio = bio;
+ queue_work(ext4_read_workqueue, &ctx->work);
+ return;
+ }
+ }
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+
+ if (!err) {
+ SetPageUptodate(page);
+ } else {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ unlock_page(page);
+ }
+
+ bio_put(bio);
+}
+
+int ext4_mpage_readpages(struct address_space *mapping,
+ struct list_head *pages, struct page *page,
+ unsigned nr_pages)
+{
+ struct bio *bio = NULL;
+ unsigned page_idx;
+ sector_t last_block_in_bio = 0;
+
+ struct inode *inode = mapping->host;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t blocks[MAX_BUF_PER_PAGE];
+ unsigned page_block;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ int length;
+ unsigned relative_block = 0;
+ struct ext4_map_blocks map;
+
+ map.m_pblk = 0;
+ map.m_lblk = 0;
+ map.m_len = 0;
+ map.m_flags = 0;
+
+ for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
+ int fully_mapped = 1;
+ unsigned first_hole = blocks_per_page;
+
+ prefetchw(&page->flags);
+ if (pages) {
+ page = list_entry(pages->prev, struct page, lru);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping,
+ page->index, GFP_KERNEL))
+ goto next_page;
+ }
+
+ if (page_has_buffers(page))
+ goto confused;
+
+ block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ last_block = block_in_file + nr_pages * blocks_per_page;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+ page_block = 0;
+
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map.m_flags & EXT4_MAP_MAPPED) &&
+ block_in_file > map.m_lblk &&
+ block_in_file < (map.m_lblk + map.m_len)) {
+ unsigned map_offset = block_in_file - map.m_lblk;
+ unsigned last = map.m_len - map_offset;
+
+ for (relative_block = 0; ; relative_block++) {
+ if (relative_block == last) {
+ /* needed? */
+ map.m_flags &= ~EXT4_MAP_MAPPED;
+ break;
+ }
+ if (page_block == blocks_per_page)
+ break;
+ blocks[page_block] = map.m_pblk + map_offset +
+ relative_block;
+ page_block++;
+ block_in_file++;
+ }
+ }
+
+ /*
+ * Then do more ext4_map_blocks() calls until we are
+ * done with this page.
+ */
+ while (page_block < blocks_per_page) {
+ if (block_in_file < last_block) {
+ map.m_lblk = block_in_file;
+ map.m_len = last_block - block_in_file;
+
+ if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
+ set_error_page:
+ SetPageError(page);
+ zero_user_segment(page, 0,
+ PAGE_CACHE_SIZE);
+ unlock_page(page);
+ goto next_page;
+ }
+ }
+ if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
+ fully_mapped = 0;
+ if (first_hole == blocks_per_page)
+ first_hole = page_block;
+ page_block++;
+ block_in_file++;
+ continue;
+ }
+ if (first_hole != blocks_per_page)
+ goto confused; /* hole -> non-hole */
+
+ /* Contiguous blocks? */
+ if (page_block && blocks[page_block-1] != map.m_pblk-1)
+ goto confused;
+ for (relative_block = 0; ; relative_block++) {
+ if (relative_block == map.m_len) {
+ /* needed? */
+ map.m_flags &= ~EXT4_MAP_MAPPED;
+ break;
+ } else if (page_block == blocks_per_page)
+ break;
+ blocks[page_block] = map.m_pblk+relative_block;
+ page_block++;
+ block_in_file++;
+ }
+ }
+ if (first_hole != blocks_per_page) {
+ zero_user_segment(page, first_hole << blkbits,
+ PAGE_CACHE_SIZE);
+ if (first_hole == 0) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto next_page;
+ }
+ } else if (fully_mapped) {
+ SetPageMappedToDisk(page);
+ }
+ if (fully_mapped && blocks_per_page == 1 &&
+ !PageUptodate(page) && cleancache_get_page(page) == 0) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (last_block_in_bio != blocks[0] - 1)) {
+ submit_and_realloc:
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ struct ext4_crypto_ctx *ctx = NULL;
+
+ if (ext4_encrypted_inode(inode) &&
+ S_ISREG(inode->i_mode)) {
+ ctx = ext4_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ goto set_error_page;
+ }
+ bio = bio_alloc(GFP_KERNEL,
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+ if (!bio) {
+ if (ctx)
+ ext4_release_crypto_ctx(ctx);
+ goto set_error_page;
+ }
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
+ bio->bi_end_io = mpage_end_io;
+ bio->bi_private = ctx;
+ }
+
+ length = first_hole << blkbits;
+ if (bio_add_page(bio, page, length, 0) < length)
+ goto submit_and_realloc;
+
+ if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
+ (relative_block == map.m_len)) ||
+ (first_hole != blocks_per_page)) {
+ submit_bio(READ, bio);
+ bio = NULL;
+ } else
+ last_block_in_bio = blocks[blocks_per_page - 1];
+ goto next_page;
+ confused:
+ if (bio) {
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ if (!PageUptodate(page))
+ block_read_full_page(page, ext4_get_block);
+ else
+ unlock_page(page);
+ next_page:
+ if (pages)
+ page_cache_release(page);
+ }
+ BUG_ON(pages && !list_empty(pages));
+ if (bio)
+ submit_bio(READ, bio);
+ return 0;
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e061e66c8280..821f22dbe825 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,7 +21,6 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/vmalloc.h>
-#include <linux/jbd2.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
@@ -323,22 +322,6 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1);
}
-/*
- * The del_gendisk() function uninitializes the disk-specific data
- * structures, including the bdi structure, without telling anyone
- * else. Once this happens, any attempt to call mark_buffer_dirty()
- * (for example, by ext4_commit_super), will cause a kernel OOPS.
- * This is a kludge to prevent these oops until we can put in a proper
- * hook in del_gendisk() to inform the VFS and file system layers.
- */
-static int block_device_ejected(struct super_block *sb)
-{
- struct inode *bd_inode = sb->s_bdev->bd_inode;
- struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
-
- return bdi->dev == NULL;
-}
-
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
@@ -893,6 +876,9 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ ei->i_encryption_key.mode = EXT4_ENCRYPTION_MODE_INVALID;
+#endif
return &ei->vfs_inode;
}
@@ -1076,7 +1062,7 @@ static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
.quota_off = ext4_quota_off,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
@@ -1120,7 +1106,7 @@ enum {
Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_data_err_abort, Opt_data_err_ignore,
+ Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
@@ -1211,6 +1197,7 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+ {Opt_test_dummy_encryption, "test_dummy_encryption"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1412,6 +1399,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
+ {Opt_test_dummy_encryption, 0, MOPT_GTE0},
{Opt_err, 0, 0}
};
@@ -1588,6 +1576,15 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
*journal_ioprio =
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
+ } else if (token == Opt_test_dummy_encryption) {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
+ ext4_msg(sb, KERN_WARNING,
+ "Test dummy encryption mode enabled");
+#else
+ ext4_msg(sb, KERN_WARNING,
+ "Test dummy encryption mount option ignored");
+#endif
} else if (m->flags & MOPT_DATAJ) {
if (is_remount) {
if (!sbi->s_journal)
@@ -2685,11 +2682,13 @@ static struct attribute *ext4_attrs[] = {
EXT4_INFO_ATTR(lazy_itable_init);
EXT4_INFO_ATTR(batched_discard);
EXT4_INFO_ATTR(meta_bg_resize);
+EXT4_INFO_ATTR(encryption);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
ATTR_LIST(meta_bg_resize),
+ ATTR_LIST(encryption),
NULL,
};
@@ -3448,6 +3447,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sb->s_bdev->bd_part)
sbi->s_sectors_written_start =
part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ /* Modes of operations for file and directory encryption. */
+ sbi->s_file_encryption_mode = EXT4_ENCRYPTION_MODE_AES_256_XTS;
+ sbi->s_dir_encryption_mode = EXT4_ENCRYPTION_MODE_INVALID;
+#endif
/* Cleanup superblock name */
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
@@ -3692,6 +3696,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
}
+ if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT) &&
+ es->s_encryption_level) {
+ ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
+ es->s_encryption_level);
+ goto failed_mount;
+ }
+
if (sb->s_blocksize != blocksize) {
/* Validate the filesystem blocksize */
if (!sb_set_blocksize(sb, blocksize)) {
@@ -4054,6 +4065,13 @@ no_journal:
}
}
+ if (unlikely(sbi->s_mount_flags & EXT4_MF_TEST_DUMMY_ENCRYPTION) &&
+ !(sb->s_flags & MS_RDONLY) &&
+ !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT)) {
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_ENCRYPT);
+ ext4_commit_super(sb, 1);
+ }
+
/*
* Get the # of file system overhead blocks from the
* superblock if present.
@@ -4570,7 +4588,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh || block_device_ejected(sb))
+ if (!sbh)
return error;
if (buffer_write_io_error(sbh)) {
/*
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index ff3711932018..136ca0e911fd 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -18,13 +18,101 @@
*/
#include <linux/fs.h>
-#include <linux/jbd2.h>
#include <linux/namei.h>
#include "ext4.h"
#include "xattr.h"
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ struct page *cpage = NULL;
+ char *caddr, *paddr = NULL;
+ struct ext4_str cstr, pstr;
+ struct inode *inode = dentry->d_inode;
+ struct ext4_fname_crypto_ctx *ctx = NULL;
+ struct ext4_encrypted_symlink_data *sd;
+ loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
+ int res;
+ u32 plen, max_size = inode->i_sb->s_blocksize;
+
+ if (!ext4_encrypted_inode(inode))
+ return page_follow_link_light(dentry, nd);
+
+ ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize);
+ if (IS_ERR(ctx))
+ return ctx;
+
+ if (ext4_inode_is_fast_symlink(inode)) {
+ caddr = (char *) EXT4_I(dentry->d_inode)->i_data;
+ max_size = sizeof(EXT4_I(dentry->d_inode)->i_data);
+ } else {
+ cpage = read_mapping_page(inode->i_mapping, 0, NULL);
+ if (IS_ERR(cpage)) {
+ ext4_put_fname_crypto_ctx(&ctx);
+ return cpage;
+ }
+ caddr = kmap(cpage);
+ caddr[size] = 0;
+ }
+
+ /* Symlink is encrypted */
+ sd = (struct ext4_encrypted_symlink_data *)caddr;
+ cstr.name = sd->encrypted_path;
+ cstr.len = le32_to_cpu(sd->len);
+ if ((cstr.len +
+ sizeof(struct ext4_encrypted_symlink_data) - 1) >
+ max_size) {
+ /* Symlink data on the disk is corrupted */
+ res = -EIO;
+ goto errout;
+ }
+ plen = (cstr.len < EXT4_FNAME_CRYPTO_DIGEST_SIZE*2) ?
+ EXT4_FNAME_CRYPTO_DIGEST_SIZE*2 : cstr.len;
+ paddr = kmalloc(plen + 1, GFP_NOFS);
+ if (!paddr) {
+ res = -ENOMEM;
+ goto errout;
+ }
+ pstr.name = paddr;
+ res = _ext4_fname_disk_to_usr(ctx, &cstr, &pstr);
+ if (res < 0)
+ goto errout;
+ /* Null-terminate the name */
+ if (res <= plen)
+ paddr[res] = '\0';
+ nd_set_link(nd, paddr);
+ ext4_put_fname_crypto_ctx(&ctx);
+ if (cpage) {
+ kunmap(cpage);
+ page_cache_release(cpage);
+ }
+ return NULL;
+errout:
+ ext4_put_fname_crypto_ctx(&ctx);
+ if (cpage) {
+ kunmap(cpage);
+ page_cache_release(cpage);
+ }
+ kfree(paddr);
+ return ERR_PTR(res);
+}
+
+static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ struct page *page = cookie;
+
+ if (!page) {
+ kfree(nd_get_link(nd));
+ } else {
+ kunmap(page);
+ page_cache_release(page);
+ }
+}
+#endif
+
+static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
+{
struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
nd_set_link(nd, (char *) ei->i_data);
return NULL;
@@ -32,8 +120,13 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+ .follow_link = ext4_follow_link,
+ .put_link = ext4_put_link,
+#else
.follow_link = page_follow_link_light,
.put_link = page_put_link,
+#endif
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
@@ -43,7 +136,7 @@ const struct inode_operations ext4_symlink_inode_operations = {
const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = ext4_follow_link,
+ .follow_link = ext4_follow_fast_link,
.setattr = ext4_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1e09fc77395c..759842ff8af0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -55,7 +55,6 @@
#include <linux/slab.h>
#include <linux/mbcache.h>
#include <linux/quotaops.h>
-#include <linux/rwsem.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "xattr.h"
@@ -639,8 +638,7 @@ ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
free += EXT4_XATTR_LEN(name_len);
}
if (i->value) {
- if (free < EXT4_XATTR_SIZE(i->value_len) ||
- free < EXT4_XATTR_LEN(name_len) +
+ if (free < EXT4_XATTR_LEN(name_len) +
EXT4_XATTR_SIZE(i->value_len))
return -ENOSPC;
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 29bedf5589f6..ddc0957760ba 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -23,6 +23,7 @@
#define EXT4_XATTR_INDEX_SECURITY 6
#define EXT4_XATTR_INDEX_SYSTEM 7
#define EXT4_XATTR_INDEX_RICHACL 8
+#define EXT4_XATTR_INDEX_ENCRYPTION 9
struct ext4_xattr_header {
__le32 h_magic; /* magic number for identification */
@@ -98,6 +99,8 @@ extern const struct xattr_handler ext4_xattr_user_handler;
extern const struct xattr_handler ext4_xattr_trusted_handler;
extern const struct xattr_handler ext4_xattr_security_handler;
+#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
+
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 94e2d2ffabe1..05f0f663f14c 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,5 +1,5 @@
config F2FS_FS
- tristate "F2FS filesystem support (EXPERIMENTAL)"
+ tristate "F2FS filesystem support"
depends on BLOCK
help
F2FS is based on Log-structured File System (LFS), which supports
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 742202779bd5..4320ffab3495 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -351,13 +351,11 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode,
*acl = f2fs_acl_clone(p, GFP_NOFS);
if (!*acl)
- return -ENOMEM;
+ goto no_mem;
ret = f2fs_acl_create_masq(*acl, mode);
- if (ret < 0) {
- posix_acl_release(*acl);
- return -ENOMEM;
- }
+ if (ret < 0)
+ goto no_mem_clone;
if (ret == 0) {
posix_acl_release(*acl);
@@ -378,6 +376,12 @@ no_acl:
*default_acl = NULL;
*acl = NULL;
return 0;
+
+no_mem_clone:
+ posix_acl_release(*acl);
+no_mem:
+ posix_acl_release(p);
+ return -ENOMEM;
}
int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7f794b72b3b7..a5e17a2a0781 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -276,7 +276,7 @@ continue_unlock:
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- if (f2fs_write_meta_page(page, &wbc)) {
+ if (mapping->a_ops->writepage(page, &wbc)) {
unlock_page(page);
break;
}
@@ -464,20 +464,19 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
void recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
- block_t start_blk, orphan_blkaddr, i, j;
+ block_t start_blk, orphan_blocks, i, j;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return;
set_sbi_flag(sbi, SBI_POR_DOING);
- start_blk = __start_cp_addr(sbi) + 1 +
- le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
- orphan_blkaddr = __start_sum_addr(sbi) - 1;
+ start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
+ orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
- ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+ ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
- for (i = 0; i < orphan_blkaddr; i++) {
+ for (i = 0; i < orphan_blocks; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
struct f2fs_orphan_block *orphan_blk;
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned long blk_size = sbi->blocksize;
unsigned long long cp1_version = 0, cp2_version = 0;
unsigned long long cp_start_blk_no;
- unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ unsigned int cp_blks = 1 + __cp_payload(sbi);
block_t cp_blk_no;
int i;
@@ -796,6 +795,7 @@ retry:
* wribacking dentry pages in the freeing inode.
*/
f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ cond_resched();
}
goto retry;
}
@@ -884,7 +884,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
void *kaddr;
int i;
- int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+ int cp_payload_blks = __cp_payload(sbi);
/*
* This avoids to conduct wrong roll-forward operations and uses
@@ -1048,17 +1048,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
- trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
-
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
- cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT)
+ (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
goto out;
if (unlikely(f2fs_cp_error(sbi)))
goto out;
if (f2fs_readonly(sbi->sb))
goto out;
+
+ trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
+
if (block_operations(sbi))
goto out;
@@ -1085,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
+
+ if (cpc->reason == CP_RECOVERY)
+ f2fs_msg(sbi->sb, KERN_NOTICE,
+ "checkpoint: version = %llx", ckpt_ver);
out:
mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
@@ -1103,14 +1108,9 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
im->ino_num = 0;
}
- /*
- * considering 512 blocks in a segment 8 blocks are needed for cp
- * and log segment summaries. Remaining blocks are used to keep
- * orphan entries with the limitation one reserved segment
- * for cp pack we can have max 1020*504 orphan entries
- */
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
- NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
+ NR_CURSEG_TYPE - __cp_payload(sbi)) *
+ F2FS_ORPHANS_PER_BLOCK;
}
int __init create_checkpoint_caches(void)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..b91b0e10678e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/prefetch.h>
+#include <linux/uio.h>
#include "f2fs.h"
#include "node.h"
@@ -25,6 +25,9 @@
#include "trace.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *extent_tree_slab;
+static struct kmem_cache *extent_node_slab;
+
static void f2fs_read_end_io(struct bio *bio, int err)
{
struct bio_vec *bvec;
@@ -197,7 +200,7 @@ alloc_new:
* ->node_page
* update block addresses in the node page
*/
-static void __set_data_blkaddr(struct dnode_of_data *dn)
+void set_data_blkaddr(struct dnode_of_data *dn)
{
struct f2fs_node *rn;
__le32 *addr_array;
@@ -226,7 +229,7 @@ int reserve_new_block(struct dnode_of_data *dn)
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
dn->data_blkaddr = NEW_ADDR;
- __set_data_blkaddr(dn);
+ set_data_blkaddr(dn);
mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
@@ -248,73 +251,62 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
return err;
}
-static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
- struct buffer_head *bh_result)
+static void f2fs_map_bh(struct super_block *sb, pgoff_t pgofs,
+ struct extent_info *ei, struct buffer_head *bh_result)
+{
+ unsigned int blkbits = sb->s_blocksize_bits;
+ size_t max_size = bh_result->b_size;
+ size_t mapped_size;
+
+ clear_buffer_new(bh_result);
+ map_bh(bh_result, sb, ei->blk + pgofs - ei->fofs);
+ mapped_size = (ei->fofs + ei->len - pgofs) << blkbits;
+ bh_result->b_size = min(max_size, mapped_size);
+}
+
+static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
- if (is_inode_flag_set(fi, FI_NO_EXTENT))
- return 0;
-
- read_lock(&fi->ext.ext_lock);
+ read_lock(&fi->ext_lock);
if (fi->ext.len == 0) {
- read_unlock(&fi->ext.ext_lock);
- return 0;
+ read_unlock(&fi->ext_lock);
+ return false;
}
stat_inc_total_hit(inode->i_sb);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
- start_blkaddr = fi->ext.blk_addr;
+ start_blkaddr = fi->ext.blk;
if (pgofs >= start_fofs && pgofs <= end_fofs) {
- unsigned int blkbits = inode->i_sb->s_blocksize_bits;
- size_t count;
-
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb,
- start_blkaddr + pgofs - start_fofs);
- count = end_fofs - pgofs + 1;
- if (count < (UINT_MAX >> blkbits))
- bh_result->b_size = (count << blkbits);
- else
- bh_result->b_size = UINT_MAX;
-
+ *ei = fi->ext;
stat_inc_read_hit(inode->i_sb);
- read_unlock(&fi->ext.ext_lock);
- return 1;
+ read_unlock(&fi->ext_lock);
+ return true;
}
- read_unlock(&fi->ext.ext_lock);
- return 0;
+ read_unlock(&fi->ext_lock);
+ return false;
}
-void update_extent_cache(struct dnode_of_data *dn)
+static bool update_extent_info(struct inode *inode, pgoff_t fofs,
+ block_t blkaddr)
{
- struct f2fs_inode_info *fi = F2FS_I(dn->inode);
- pgoff_t fofs, start_fofs, end_fofs;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ pgoff_t start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
int need_update = true;
- f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
-
- /* Update the page address in the parent node */
- __set_data_blkaddr(dn);
-
- if (is_inode_flag_set(fi, FI_NO_EXTENT))
- return;
-
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
- dn->ofs_in_node;
-
- write_lock(&fi->ext.ext_lock);
+ write_lock(&fi->ext_lock);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
- start_blkaddr = fi->ext.blk_addr;
- end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
+ start_blkaddr = fi->ext.blk;
+ end_blkaddr = fi->ext.blk + fi->ext.len - 1;
/* Drop and initialize the matched extent */
if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +314,24 @@ void update_extent_cache(struct dnode_of_data *dn)
/* Initial extent */
if (fi->ext.len == 0) {
- if (dn->data_blkaddr != NULL_ADDR) {
+ if (blkaddr != NULL_ADDR) {
fi->ext.fofs = fofs;
- fi->ext.blk_addr = dn->data_blkaddr;
+ fi->ext.blk = blkaddr;
fi->ext.len = 1;
}
goto end_update;
}
/* Front merge */
- if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) {
+ if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
fi->ext.fofs--;
- fi->ext.blk_addr--;
+ fi->ext.blk--;
fi->ext.len++;
goto end_update;
}
/* Back merge */
- if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) {
+ if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
fi->ext.len++;
goto end_update;
}
@@ -351,8 +343,7 @@ void update_extent_cache(struct dnode_of_data *dn)
fi->ext.len = fofs - start_fofs;
} else {
fi->ext.fofs = fofs + 1;
- fi->ext.blk_addr = start_blkaddr +
- fofs - start_fofs + 1;
+ fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
} else {
@@ -366,27 +357,583 @@ void update_extent_cache(struct dnode_of_data *dn)
need_update = true;
}
end_update:
- write_unlock(&fi->ext.ext_lock);
- if (need_update)
- sync_inode_page(dn);
+ write_unlock(&fi->ext_lock);
+ return need_update;
+}
+
+static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_info *ei,
+ struct rb_node *parent, struct rb_node **p)
+{
+ struct extent_node *en;
+
+ en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
+ if (!en)
+ return NULL;
+
+ en->ei = *ei;
+ INIT_LIST_HEAD(&en->list);
+
+ rb_link_node(&en->rb_node, parent, p);
+ rb_insert_color(&en->rb_node, &et->root);
+ et->count++;
+ atomic_inc(&sbi->total_ext_node);
+ return en;
+}
+
+static void __detach_extent_node(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ rb_erase(&en->rb_node, &et->root);
+ et->count--;
+ atomic_dec(&sbi->total_ext_node);
+
+ if (et->cached_en == en)
+ et->cached_en = NULL;
+}
+
+static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
+ nid_t ino)
+{
+ struct extent_tree *et;
+
+ down_read(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+ if (!et) {
+ up_read(&sbi->extent_tree_lock);
+ return NULL;
+ }
+ atomic_inc(&et->refcount);
+ up_read(&sbi->extent_tree_lock);
+
+ return et;
+}
+
+static struct extent_tree *__grab_extent_tree(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ nid_t ino = inode->i_ino;
+
+ down_write(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+ if (!et) {
+ et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
+ f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
+ memset(et, 0, sizeof(struct extent_tree));
+ et->ino = ino;
+ et->root = RB_ROOT;
+ et->cached_en = NULL;
+ rwlock_init(&et->lock);
+ atomic_set(&et->refcount, 0);
+ et->count = 0;
+ sbi->total_ext_tree++;
+ }
+ atomic_inc(&et->refcount);
+ up_write(&sbi->extent_tree_lock);
+
+ return et;
+}
+
+static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
+ unsigned int fofs)
+{
+ struct rb_node *node = et->root.rb_node;
+ struct extent_node *en;
+
+ if (et->cached_en) {
+ struct extent_info *cei = &et->cached_en->ei;
+
+ if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
+ return et->cached_en;
+ }
+
+ while (node) {
+ en = rb_entry(node, struct extent_node, rb_node);
+
+ if (fofs < en->ei.fofs) {
+ node = node->rb_left;
+ } else if (fofs >= en->ei.fofs + en->ei.len) {
+ node = node->rb_right;
+ } else {
+ et->cached_en = en;
+ return en;
+ }
+ }
+ return NULL;
+}
+
+static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ struct extent_node *prev;
+ struct rb_node *node;
+
+ node = rb_prev(&en->rb_node);
+ if (!node)
+ return NULL;
+
+ prev = rb_entry(node, struct extent_node, rb_node);
+ if (__is_back_mergeable(&en->ei, &prev->ei)) {
+ en->ei.fofs = prev->ei.fofs;
+ en->ei.blk = prev->ei.blk;
+ en->ei.len += prev->ei.len;
+ __detach_extent_node(sbi, et, prev);
+ return prev;
+ }
+ return NULL;
+}
+
+static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ struct extent_node *next;
+ struct rb_node *node;
+
+ node = rb_next(&en->rb_node);
+ if (!node)
+ return NULL;
+
+ next = rb_entry(node, struct extent_node, rb_node);
+ if (__is_front_mergeable(&en->ei, &next->ei)) {
+ en->ei.len += next->ei.len;
+ __detach_extent_node(sbi, et, next);
+ return next;
+ }
+ return NULL;
+}
+
+static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_info *ei,
+ struct extent_node **den)
+{
+ struct rb_node **p = &et->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct extent_node *en;
+
+ while (*p) {
+ parent = *p;
+ en = rb_entry(parent, struct extent_node, rb_node);
+
+ if (ei->fofs < en->ei.fofs) {
+ if (__is_front_mergeable(ei, &en->ei)) {
+ f2fs_bug_on(sbi, !den);
+ en->ei.fofs = ei->fofs;
+ en->ei.blk = ei->blk;
+ en->ei.len += ei->len;
+ *den = __try_back_merge(sbi, et, en);
+ return en;
+ }
+ p = &(*p)->rb_left;
+ } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
+ if (__is_back_mergeable(ei, &en->ei)) {
+ f2fs_bug_on(sbi, !den);
+ en->ei.len += ei->len;
+ *den = __try_front_merge(sbi, et, en);
+ return en;
+ }
+ p = &(*p)->rb_right;
+ } else {
+ f2fs_bug_on(sbi, 1);
+ }
+ }
+
+ return __attach_extent_node(sbi, et, ei, parent, p);
+}
+
+static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, bool free_all)
+{
+ struct rb_node *node, *next;
+ struct extent_node *en;
+ unsigned int count = et->count;
+
+ node = rb_first(&et->root);
+ while (node) {
+ next = rb_next(node);
+ en = rb_entry(node, struct extent_node, rb_node);
+
+ if (free_all) {
+ spin_lock(&sbi->extent_lock);
+ if (!list_empty(&en->list))
+ list_del_init(&en->list);
+ spin_unlock(&sbi->extent_lock);
+ }
+
+ if (free_all || list_empty(&en->list)) {
+ __detach_extent_node(sbi, et, en);
+ kmem_cache_free(extent_node_slab, en);
+ }
+ node = next;
+ }
+
+ return count - et->count;
+}
+
+static void f2fs_init_extent_tree(struct inode *inode,
+ struct f2fs_extent *i_ext)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en;
+ struct extent_info ei;
+
+ if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
+ return;
+
+ et = __grab_extent_tree(inode);
+
+ write_lock(&et->lock);
+ if (et->count)
+ goto out;
+
+ set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
+ le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
+
+ en = __insert_extent_tree(sbi, et, &ei, NULL);
+ if (en) {
+ et->cached_en = en;
+
+ spin_lock(&sbi->extent_lock);
+ list_add_tail(&en->list, &sbi->extent_list);
+ spin_unlock(&sbi->extent_lock);
+ }
+out:
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+}
+
+static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en;
+
+ trace_f2fs_lookup_extent_tree_start(inode, pgofs);
+
+ et = __find_extent_tree(sbi, inode->i_ino);
+ if (!et)
+ return false;
+
+ read_lock(&et->lock);
+ en = __lookup_extent_tree(et, pgofs);
+ if (en) {
+ *ei = en->ei;
+ spin_lock(&sbi->extent_lock);
+ if (!list_empty(&en->list))
+ list_move_tail(&en->list, &sbi->extent_list);
+ spin_unlock(&sbi->extent_lock);
+ stat_inc_read_hit(sbi->sb);
+ }
+ stat_inc_total_hit(sbi->sb);
+ read_unlock(&et->lock);
+
+ trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
+
+ atomic_dec(&et->refcount);
+ return en ? true : false;
+}
+
+static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
+ block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
+ struct extent_node *den = NULL;
+ struct extent_info ei, dei;
+ unsigned int endofs;
+
+ trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
+
+ et = __grab_extent_tree(inode);
+
+ write_lock(&et->lock);
+
+ /* 1. lookup and remove existing extent info in cache */
+ en = __lookup_extent_tree(et, fofs);
+ if (!en)
+ goto update_extent;
+
+ dei = en->ei;
+ __detach_extent_node(sbi, et, en);
+
+ /* 2. if extent can be split more, split and insert the left part */
+ if (dei.len > 1) {
+ /* insert left part of split extent into cache */
+ if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
+ set_extent_info(&ei, dei.fofs, dei.blk,
+ fofs - dei.fofs);
+ en1 = __insert_extent_tree(sbi, et, &ei, NULL);
+ }
+
+ /* insert right part of split extent into cache */
+ endofs = dei.fofs + dei.len - 1;
+ if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
+ set_extent_info(&ei, fofs + 1,
+ fofs - dei.fofs + dei.blk, endofs - fofs);
+ en2 = __insert_extent_tree(sbi, et, &ei, NULL);
+ }
+ }
+
+update_extent:
+ /* 3. update extent in extent cache */
+ if (blkaddr) {
+ set_extent_info(&ei, fofs, blkaddr, 1);
+ en3 = __insert_extent_tree(sbi, et, &ei, &den);
+ }
+
+ /* 4. update in global extent list */
+ spin_lock(&sbi->extent_lock);
+ if (en && !list_empty(&en->list))
+ list_del(&en->list);
+ /*
+ * en1 and en2 split from en, they will become more and more smaller
+ * fragments after splitting several times. So if the length is smaller
+ * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
+ */
+ if (en1)
+ list_add_tail(&en1->list, &sbi->extent_list);
+ if (en2)
+ list_add_tail(&en2->list, &sbi->extent_list);
+ if (en3) {
+ if (list_empty(&en3->list))
+ list_add_tail(&en3->list, &sbi->extent_list);
+ else
+ list_move_tail(&en3->list, &sbi->extent_list);
+ }
+ if (den && !list_empty(&den->list))
+ list_del(&den->list);
+ spin_unlock(&sbi->extent_lock);
+
+ /* 5. release extent node */
+ if (en)
+ kmem_cache_free(extent_node_slab, en);
+ if (den)
+ kmem_cache_free(extent_node_slab, den);
+
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+}
+
+void f2fs_preserve_extent_tree(struct inode *inode)
+{
+ struct extent_tree *et;
+ struct extent_info *ext = &F2FS_I(inode)->ext;
+ bool sync = false;
+
+ if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ return;
+
+ et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
+ if (!et) {
+ if (ext->len) {
+ ext->len = 0;
+ update_inode_page(inode);
+ }
+ return;
+ }
+
+ read_lock(&et->lock);
+ if (et->count) {
+ struct extent_node *en;
+
+ if (et->cached_en) {
+ en = et->cached_en;
+ } else {
+ struct rb_node *node = rb_first(&et->root);
+
+ if (!node)
+ node = rb_last(&et->root);
+ en = rb_entry(node, struct extent_node, rb_node);
+ }
+
+ if (__is_extent_same(ext, &en->ei))
+ goto out;
+
+ *ext = en->ei;
+ sync = true;
+ } else if (ext->len) {
+ ext->len = 0;
+ sync = true;
+ }
+out:
+ read_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+
+ if (sync)
+ update_inode_page(inode);
+}
+
+void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+ struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+ struct extent_node *en, *tmp;
+ unsigned long ino = F2FS_ROOT_INO(sbi);
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned int found;
+ unsigned int node_cnt = 0, tree_cnt = 0;
+
+ if (!test_opt(sbi, EXTENT_CACHE))
+ return;
+
+ if (available_free_memory(sbi, EXTENT_CACHE))
+ return;
+
+ spin_lock(&sbi->extent_lock);
+ list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
+ if (!nr_shrink--)
+ break;
+ list_del_init(&en->list);
+ }
+ spin_unlock(&sbi->extent_lock);
+
+ down_read(&sbi->extent_tree_lock);
+ while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
+ (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
+ unsigned i;
+
+ ino = treevec[found - 1]->ino + 1;
+ for (i = 0; i < found; i++) {
+ struct extent_tree *et = treevec[i];
+
+ atomic_inc(&et->refcount);
+ write_lock(&et->lock);
+ node_cnt += __free_extent_tree(sbi, et, false);
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+ }
+ }
+ up_read(&sbi->extent_tree_lock);
+
+ down_write(&sbi->extent_tree_lock);
+ radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
+ F2FS_ROOT_INO(sbi)) {
+ struct extent_tree *et = (struct extent_tree *)*slot;
+
+ if (!atomic_read(&et->refcount) && !et->count) {
+ radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ kmem_cache_free(extent_tree_slab, et);
+ sbi->total_ext_tree--;
+ tree_cnt++;
+ }
+ }
+ up_write(&sbi->extent_tree_lock);
+
+ trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
+}
+
+void f2fs_destroy_extent_tree(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ unsigned int node_cnt = 0;
+
+ if (!test_opt(sbi, EXTENT_CACHE))
+ return;
+
+ et = __find_extent_tree(sbi, inode->i_ino);
+ if (!et)
+ goto out;
+
+ /* free all extent info belong to this extent tree */
+ write_lock(&et->lock);
+ node_cnt = __free_extent_tree(sbi, et, true);
+ write_unlock(&et->lock);
+
+ atomic_dec(&et->refcount);
+
+ /* try to find and delete extent tree entry in radix tree */
+ down_write(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
+ if (!et) {
+ up_write(&sbi->extent_tree_lock);
+ goto out;
+ }
+ f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+ radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+ kmem_cache_free(extent_tree_slab, et);
+ sbi->total_ext_tree--;
+ up_write(&sbi->extent_tree_lock);
+out:
+ trace_f2fs_destroy_extent_tree(inode, node_cnt);
return;
}
+void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
+{
+ if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ f2fs_init_extent_tree(inode, i_ext);
+
+ write_lock(&F2FS_I(inode)->ext_lock);
+ get_extent_info(&F2FS_I(inode)->ext, *i_ext);
+ write_unlock(&F2FS_I(inode)->ext_lock);
+}
+
+static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
+{
+ if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+ return false;
+
+ if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ return f2fs_lookup_extent_tree(inode, pgofs, ei);
+
+ return lookup_extent_info(inode, pgofs, ei);
+}
+
+void f2fs_update_extent_cache(struct dnode_of_data *dn)
+{
+ struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+ pgoff_t fofs;
+
+ f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
+
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
+
+ if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
+ return f2fs_update_extent_tree(dn->inode, fofs,
+ dn->data_blkaddr);
+
+ if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
+ sync_inode_page(dn);
+}
+
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
{
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
+ struct extent_info ei;
int err;
struct f2fs_io_info fio = {
.type = DATA,
.rw = sync ? READ_SYNC : READA,
};
+ /*
+ * If sync is false, it needs to check its block allocation.
+ * This is need and triggered by two flows:
+ * gc and truncate_partial_data_page.
+ */
+ if (!sync)
+ goto search;
+
page = find_get_page(mapping, index);
if (page && PageUptodate(page))
return page;
f2fs_put_page(page, 0);
+search:
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
@@ -401,6 +948,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
if (unlikely(dn.data_blkaddr == NEW_ADDR))
return ERR_PTR(-EINVAL);
+got_it:
page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
@@ -435,6 +983,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
+ struct extent_info ei;
int err;
struct f2fs_io_info fio = {
.type = DATA,
@@ -445,6 +994,11 @@ repeat:
if (!page)
return ERR_PTR(-ENOMEM);
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err) {
@@ -458,6 +1012,7 @@ repeat:
return ERR_PTR(-ENOENT);
}
+got_it:
if (PageUptodate(page))
return page;
@@ -569,19 +1124,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
+
+ dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
+ if (dn->data_blkaddr == NEW_ADDR)
+ goto alloc;
+
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
+alloc:
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
seg = CURSEG_DIRECT_IO;
- allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg);
+ allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ &sum, seg);
/* direct IO doesn't use extent cache to maximize the performance */
- __set_data_blkaddr(dn);
+ set_data_blkaddr(dn);
/* update i_size */
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1177,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
while (dn.ofs_in_node < end_offset && len) {
- if (dn.data_blkaddr == NULL_ADDR) {
+ block_t blkaddr;
+
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
if (__allocate_data_block(&dn))
goto sync_out;
allocated = true;
@@ -659,13 +1224,16 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
pgoff_t pgofs, end_offset;
int err = 0, ofs = 1;
+ struct extent_info ei;
bool allocated = false;
/* Get the page offset from the block offset(iblock) */
pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
- if (check_extent_cache(inode, pgofs, bh_result))
+ if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ f2fs_map_bh(inode->i_sb, pgofs, &ei, bh_result);
goto out;
+ }
if (create)
f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,7 +1250,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
- set_buffer_new(bh_result);
+ clear_buffer_new(bh_result);
map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
} else if (create) {
err = __allocate_data_block(&dn);
@@ -727,6 +1295,7 @@ get_next:
if (err)
goto sync_out;
allocated = true;
+ set_buffer_new(bh_result);
blkaddr = dn.data_blkaddr;
}
/* Give more consecutive addresses for the readahead */
@@ -813,8 +1382,10 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
fio->blk_addr = dn.data_blkaddr;
/* This page is already truncated */
- if (fio->blk_addr == NULL_ADDR)
+ if (fio->blk_addr == NULL_ADDR) {
+ ClearPageUptodate(page);
goto out_writepage;
+ }
set_page_writeback(page);
@@ -827,10 +1398,15 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
need_inplace_update(inode))) {
rewrite_data_page(page, fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
+ trace_f2fs_do_write_data_page(page, IPU);
} else {
write_data_page(page, &dn, fio);
- update_extent_cache(&dn);
+ set_data_blkaddr(&dn);
+ f2fs_update_extent_cache(&dn);
+ trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ if (page->index == 0)
+ set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -909,6 +1485,8 @@ done:
clear_cold_data(page);
out:
inode_dec_dirty_pages(inode);
+ if (err)
+ ClearPageUptodate(page);
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
@@ -935,7 +1513,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- bool locked = false;
int ret;
long diff;
@@ -950,15 +1527,13 @@ static int f2fs_write_data_pages(struct address_space *mapping,
available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
+ /* during POR, we don't need to trigger writepage at all. */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto skip_write;
+
diff = nr_pages_to_write(sbi, DATA, wbc);
- if (!S_ISDIR(inode->i_mode)) {
- mutex_lock(&sbi->writepages);
- locked = true;
- }
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
- if (locked)
- mutex_unlock(&sbi->writepages);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
@@ -1118,12 +1693,12 @@ static int f2fs_write_end(struct file *file,
return copied;
}
-static int check_direct_IO(struct inode *inode, int rw,
- struct iov_iter *iter, loff_t offset)
+static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
+ loff_t offset)
{
unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
- if (rw == READ)
+ if (iov_iter_rw(iter) == READ)
return 0;
if (offset & blocksize_mask)
@@ -1135,8 +1710,8 @@ static int check_direct_IO(struct inode *inode, int rw,
return 0;
}
-static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -1151,19 +1726,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
return err;
}
- if (check_direct_IO(inode, rw, iter, offset))
+ if (check_direct_IO(inode, iter, offset))
return 0;
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+ trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
- if (rw & WRITE)
+ if (iov_iter_rw(iter) == WRITE)
__allocate_data_blocks(inode, offset, count);
- err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
- if (err < 0 && (rw & WRITE))
+ err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
+ if (err < 0 && iov_iter_rw(iter) == WRITE)
f2fs_write_failed(mapping, offset + count);
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+ trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
return err;
}
@@ -1236,6 +1811,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, get_data_block);
}
+void init_extent_cache_info(struct f2fs_sb_info *sbi)
+{
+ INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
+ init_rwsem(&sbi->extent_tree_lock);
+ INIT_LIST_HEAD(&sbi->extent_list);
+ spin_lock_init(&sbi->extent_lock);
+ sbi->total_ext_tree = 0;
+ atomic_set(&sbi->total_ext_node, 0);
+}
+
+int __init create_extent_cache(void)
+{
+ extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
+ sizeof(struct extent_tree));
+ if (!extent_tree_slab)
+ return -ENOMEM;
+ extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
+ sizeof(struct extent_node));
+ if (!extent_node_slab) {
+ kmem_cache_destroy(extent_tree_slab);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void destroy_extent_cache(void)
+{
+ kmem_cache_destroy(extent_node_slab);
+ kmem_cache_destroy(extent_tree_slab);
+}
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index e671373cc8ab..f5388f37217e 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -35,6 +35,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
/* validation check of the segment numbers */
si->hit_ext = sbi->read_hit_ext;
si->total_ext = sbi->total_hit_ext;
+ si->ext_tree = sbi->total_ext_tree;
+ si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_dirs = sbi->n_dirty_dirs;
@@ -185,6 +187,9 @@ get_cache:
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
+ si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
+ si->cache_mem += atomic_read(&sbi->total_ext_node) *
+ sizeof(struct extent_node);
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
@@ -260,13 +265,20 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
- seq_printf(s, " - data segments : %d\n", si->data_segs);
- seq_printf(s, " - node segments : %d\n", si->node_segs);
- seq_printf(s, "Try to move %d blocks\n", si->tot_blks);
- seq_printf(s, " - data blocks : %d\n", si->data_blks);
- seq_printf(s, " - node blocks : %d\n", si->node_blks);
+ seq_printf(s, " - data segments : %d (%d)\n",
+ si->data_segs, si->bg_data_segs);
+ seq_printf(s, " - node segments : %d (%d)\n",
+ si->node_segs, si->bg_node_segs);
+ seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
+ si->bg_data_blks + si->bg_node_blks);
+ seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks,
+ si->bg_data_blks);
+ seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
+ si->bg_node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
+ seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree);
+ seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b74097a7f6d9..3a3302ab7871 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -59,9 +59,8 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK,
};
-void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+void set_de_type(struct f2fs_dir_entry *de, umode_t mode)
{
- umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
@@ -127,22 +126,19 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
*max_slots = 0;
while (bit_pos < d->max) {
if (!test_bit_le(bit_pos, d->bitmap)) {
- if (bit_pos == 0)
- max_len = 1;
- else if (!test_bit_le(bit_pos - 1, d->bitmap))
- max_len++;
bit_pos++;
+ max_len++;
continue;
}
+
de = &d->dentry[bit_pos];
if (early_match_name(name->len, namehash, de) &&
!memcmp(d->filename[bit_pos], name->name, name->len))
goto found;
- if (max_slots && *max_slots >= 0 && max_len > *max_slots) {
+ if (max_slots && max_len > *max_slots)
*max_slots = max_len;
- max_len = 0;
- }
+ max_len = 0;
/* remain bug on condition */
if (unlikely(!de->name_len))
@@ -219,14 +215,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ *res_page = NULL;
+
if (f2fs_has_inline_dentry(dir))
return find_in_inline_dir(dir, child, res_page);
if (npages == 0)
return NULL;
- *res_page = NULL;
-
name_hash = f2fs_dentry_hash(child);
max_depth = F2FS_I(dir)->i_current_depth;
@@ -285,7 +281,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
lock_page(page);
f2fs_wait_on_page_writeback(page, type);
de->ino = cpu_to_le32(inode->i_ino);
- set_de_type(de, inode);
+ set_de_type(de, inode->i_mode);
f2fs_dentry_kunmap(dir, page);
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -331,14 +327,14 @@ void do_make_empty_dir(struct inode *inode, struct inode *parent,
de->hash_code = 0;
de->ino = cpu_to_le32(inode->i_ino);
memcpy(d->filename[0], ".", 1);
- set_de_type(de, inode);
+ set_de_type(de, inode->i_mode);
de = &d->dentry[1];
de->hash_code = 0;
de->name_len = cpu_to_le16(2);
de->ino = cpu_to_le32(parent->i_ino);
memcpy(d->filename[1], "..", 2);
- set_de_type(de, inode);
+ set_de_type(de, parent->i_mode);
test_and_set_bit_le(0, (void *)d->bitmap);
test_and_set_bit_le(1, (void *)d->bitmap);
@@ -435,7 +431,7 @@ error:
void update_parent_metadata(struct inode *dir, struct inode *inode,
unsigned int current_depth)
{
- if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
+ if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
if (S_ISDIR(inode->i_mode)) {
inc_nlink(dir);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -450,7 +446,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
- if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
+ if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK))
clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
}
@@ -474,30 +470,47 @@ next:
goto next;
}
+void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
+ const struct qstr *name, f2fs_hash_t name_hash,
+ unsigned int bit_pos)
+{
+ struct f2fs_dir_entry *de;
+ int slots = GET_DENTRY_SLOTS(name->len);
+ int i;
+
+ de = &d->dentry[bit_pos];
+ de->hash_code = name_hash;
+ de->name_len = cpu_to_le16(name->len);
+ memcpy(d->filename[bit_pos], name->name, name->len);
+ de->ino = cpu_to_le32(ino);
+ set_de_type(de, mode);
+ for (i = 0; i < slots; i++)
+ test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
+}
+
/*
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
int __f2fs_add_link(struct inode *dir, const struct qstr *name,
- struct inode *inode)
+ struct inode *inode, nid_t ino, umode_t mode)
{
unsigned int bit_pos;
unsigned int level;
unsigned int current_depth;
unsigned long bidx, block;
f2fs_hash_t dentry_hash;
- struct f2fs_dir_entry *de;
unsigned int nbucket, nblock;
size_t namelen = name->len;
struct page *dentry_page = NULL;
struct f2fs_dentry_block *dentry_blk = NULL;
+ struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(namelen);
- struct page *page;
+ struct page *page = NULL;
int err = 0;
- int i;
if (f2fs_has_inline_dentry(dir)) {
- err = f2fs_add_inline_entry(dir, name, inode);
+ err = f2fs_add_inline_entry(dir, name, inode, ino, mode);
if (!err || err != -EAGAIN)
return err;
else
@@ -547,30 +560,31 @@ start:
add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA);
- down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name, NULL);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto fail;
+ if (inode) {
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, NULL);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
}
- de = &dentry_blk->dentry[bit_pos];
- de->hash_code = dentry_hash;
- de->name_len = cpu_to_le16(namelen);
- memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
- de->ino = cpu_to_le32(inode->i_ino);
- set_de_type(de, inode);
- for (i = 0; i < slots; i++)
- test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+
+ make_dentry_ptr(&d, (void *)dentry_blk, 1);
+ f2fs_update_dentry(ino, mode, &d, name, dentry_hash, bit_pos);
+
set_page_dirty(dentry_page);
- /* we don't need to mark_inode_dirty now */
- F2FS_I(inode)->i_pino = dir->i_ino;
- update_inode(inode, page);
- f2fs_put_page(page, 1);
+ if (inode) {
+ /* we don't need to mark_inode_dirty now */
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+ }
update_parent_metadata(dir, inode, current_depth);
fail:
- up_write(&F2FS_I(inode)->i_sem);
+ if (inode)
+ up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode_page(dir);
@@ -669,6 +683,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
clear_page_dirty_for_io(page);
+ ClearPagePrivate(page);
ClearPageUptodate(page);
inode_dec_dirty_pages(dir);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fa3313ab0e2..c06a25e5cec3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -50,6 +50,7 @@
#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
+#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -102,6 +103,7 @@ enum {
CP_UMOUNT,
CP_FASTBOOT,
CP_SYNC,
+ CP_RECOVERY,
CP_DISCARD,
};
@@ -216,6 +218,15 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
+/*
+ * should be same as XFS_IOC_GOINGDOWN.
+ * Flags for going down operation used by FS_IOC_GOINGDOWN
+ */
+#define F2FS_IOC_SHUTDOWN _IOR('X', 125, __u32) /* Shutdown */
+#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
+#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
+#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
@@ -273,14 +284,34 @@ enum {
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
+/* vector size for gang look-up from extent cache that consists of radix tree */
+#define EXT_TREE_VEC_SIZE 64
+
/* for in-memory extent cache entry */
-#define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */
+#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
+
+/* number of extent info in extent cache we try to shrink */
+#define EXTENT_CACHE_SHRINK_NUMBER 128
struct extent_info {
- rwlock_t ext_lock; /* rwlock for consistency */
- unsigned int fofs; /* start offset in a file */
- u32 blk_addr; /* start block address of the extent */
- unsigned int len; /* length of the extent */
+ unsigned int fofs; /* start offset in a file */
+ u32 blk; /* start block address of the extent */
+ unsigned int len; /* length of the extent */
+};
+
+struct extent_node {
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ struct list_head list; /* node in global extent list of sbi */
+ struct extent_info ei; /* extent info */
+};
+
+struct extent_tree {
+ nid_t ino; /* inode number */
+ struct rb_root root; /* root of extent info rb-tree */
+ struct extent_node *cached_en; /* recently accessed extent node */
+ rwlock_t lock; /* protect extent info rb-tree */
+ atomic_t refcount; /* reference count of rb-tree */
+ unsigned int count; /* # of extent node in rb-tree*/
};
/*
@@ -309,6 +340,7 @@ struct f2fs_inode_info {
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
+ rwlock_t ext_lock; /* rwlock for single extent cache */
struct inode_entry *dirty_dir; /* the pointer of dirty dir */
struct radix_tree_root inmem_root; /* radix tree for inmem pages */
@@ -319,21 +351,51 @@ struct f2fs_inode_info {
static inline void get_extent_info(struct extent_info *ext,
struct f2fs_extent i_ext)
{
- write_lock(&ext->ext_lock);
ext->fofs = le32_to_cpu(i_ext.fofs);
- ext->blk_addr = le32_to_cpu(i_ext.blk_addr);
+ ext->blk = le32_to_cpu(i_ext.blk);
ext->len = le32_to_cpu(i_ext.len);
- write_unlock(&ext->ext_lock);
}
static inline void set_raw_extent(struct extent_info *ext,
struct f2fs_extent *i_ext)
{
- read_lock(&ext->ext_lock);
i_ext->fofs = cpu_to_le32(ext->fofs);
- i_ext->blk_addr = cpu_to_le32(ext->blk_addr);
+ i_ext->blk = cpu_to_le32(ext->blk);
i_ext->len = cpu_to_le32(ext->len);
- read_unlock(&ext->ext_lock);
+}
+
+static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
+ u32 blk, unsigned int len)
+{
+ ei->fofs = fofs;
+ ei->blk = blk;
+ ei->len = len;
+}
+
+static inline bool __is_extent_same(struct extent_info *ei1,
+ struct extent_info *ei2)
+{
+ return (ei1->fofs == ei2->fofs && ei1->blk == ei2->blk &&
+ ei1->len == ei2->len);
+}
+
+static inline bool __is_extent_mergeable(struct extent_info *back,
+ struct extent_info *front)
+{
+ return (back->fofs + back->len == front->fofs &&
+ back->blk + back->len == front->blk);
+}
+
+static inline bool __is_back_mergeable(struct extent_info *cur,
+ struct extent_info *back)
+{
+ return __is_extent_mergeable(back, cur);
+}
+
+static inline bool __is_front_mergeable(struct extent_info *cur,
+ struct extent_info *front)
+{
+ return __is_extent_mergeable(cur, front);
}
struct f2fs_nm_info {
@@ -502,6 +564,10 @@ enum page_type {
META,
NR_PAGE_TYPE,
META_FLUSH,
+ INMEM, /* the below types are used by tracepoints only. */
+ INMEM_DROP,
+ IPU,
+ OPU,
};
struct f2fs_io_info {
@@ -559,7 +625,6 @@ struct f2fs_sb_info {
struct mutex cp_mutex; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
- struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
@@ -571,6 +636,14 @@ struct f2fs_sb_info {
struct list_head dir_inode_list; /* dir inode list */
spinlock_t dir_inode_lock; /* for dir inode list lock */
+ /* for extent tree cache */
+ struct radix_tree_root extent_tree_root;/* cache extent cache entries */
+ struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
+ struct list_head extent_list; /* lru list for shrinker */
+ spinlock_t extent_lock; /* locking extent lru list */
+ int total_ext_tree; /* extent tree count */
+ atomic_t total_ext_node; /* extent info count */
+
/* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
unsigned int log_blocksize; /* log2 block size */
@@ -920,12 +993,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
return 0;
}
+static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
+{
+ return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+}
+
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
int offset;
- if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+ if (__cp_payload(sbi) > 0) {
if (flag == NAT_BITMAP)
return &ckpt->sit_nat_version_bitmap;
else
@@ -1166,8 +1244,10 @@ enum {
FI_NEED_IPU, /* used for ipu per file */
FI_ATOMIC_FILE, /* indicate atomic file */
FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
+ FI_INLINE_DOTS, /* indicate inline dot dentries */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1204,6 +1284,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
set_inode_flag(fi, FI_INLINE_DENTRY);
if (ri->i_inline & F2FS_DATA_EXIST)
set_inode_flag(fi, FI_DATA_EXIST);
+ if (ri->i_inline & F2FS_INLINE_DOTS)
+ set_inode_flag(fi, FI_INLINE_DOTS);
}
static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1219,6 +1301,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
ri->i_inline |= F2FS_INLINE_DENTRY;
if (is_inode_flag_set(fi, FI_DATA_EXIST))
ri->i_inline |= F2FS_DATA_EXIST;
+ if (is_inode_flag_set(fi, FI_INLINE_DOTS))
+ ri->i_inline |= F2FS_INLINE_DOTS;
}
static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1264,6 +1348,11 @@ static inline int f2fs_exist_data(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
}
+static inline int f2fs_has_inline_dots(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS);
+}
+
static inline bool f2fs_is_atomic_file(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1274,6 +1363,11 @@ static inline bool f2fs_is_volatile_file(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE);
}
+static inline bool f2fs_is_first_block_written(struct inode *inode)
+{
+ return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+}
+
static inline bool f2fs_is_drop_cache(struct inode *inode)
{
return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE);
@@ -1290,12 +1384,6 @@ static inline int f2fs_has_inline_dentry(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
}
-static inline void *inline_dentry_addr(struct page *page)
-{
- struct f2fs_inode *ri = F2FS_INODE(page);
- return (void *)&(ri->i_addr[1]);
-}
-
static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
{
if (!f2fs_has_inline_dentry(dir))
@@ -1363,7 +1451,7 @@ struct dentry *f2fs_get_parent(struct dentry *child);
* dir.c
*/
extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
-void set_de_type(struct f2fs_dir_entry *, struct inode *);
+void set_de_type(struct f2fs_dir_entry *, umode_t);
struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
struct f2fs_dentry_ptr *);
bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
@@ -1382,7 +1470,10 @@ ino_t f2fs_inode_by_name(struct inode *, struct qstr *);
void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
struct page *, struct inode *);
int update_dent_inode(struct inode *, const struct qstr *);
-int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
+void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *,
+ const struct qstr *, f2fs_hash_t , unsigned int);
+int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t,
+ umode_t);
void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
struct inode *);
int f2fs_do_tmpfile(struct inode *, struct inode *);
@@ -1392,7 +1483,7 @@ bool f2fs_empty_dir(struct inode *);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
- inode);
+ inode, inode->i_ino, inode->i_mode);
}
/*
@@ -1519,14 +1610,22 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_sb_info *, struct page *,
struct f2fs_io_info *);
+void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
-void update_extent_cache(struct dnode_of_data *);
+void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
+void f2fs_destroy_extent_tree(struct inode *);
+void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
+void f2fs_update_extent_cache(struct dnode_of_data *);
+void f2fs_preserve_extent_tree(struct inode *);
struct page *find_data_page(struct inode *, pgoff_t, bool);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct page *, struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
+void init_extent_cache_info(struct f2fs_sb_info *);
+int __init create_extent_cache(void);
+void destroy_extent_cache(void);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
@@ -1554,7 +1653,7 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
- int hit_ext, total_ext;
+ int hit_ext, total_ext, ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
@@ -1566,7 +1665,9 @@ struct f2fs_stat_info {
int dirty_count, node_pages, meta_pages;
int prefree_count, call_count, cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
+ int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
+ int bg_data_blks, bg_node_blks;
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
int curzone[NR_CURSEG_TYPE];
@@ -1615,31 +1716,36 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
((sbi)->block_count[(curseg)->alloc_type]++)
#define stat_inc_inplace_blocks(sbi) \
(atomic_inc(&(sbi)->inplace_count))
-#define stat_inc_seg_count(sbi, type) \
+#define stat_inc_seg_count(sbi, type, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
(si)->tot_segs++; \
- if (type == SUM_TYPE_DATA) \
+ if (type == SUM_TYPE_DATA) { \
si->data_segs++; \
- else \
+ si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \
+ } else { \
si->node_segs++; \
+ si->bg_node_segs += (gc_type == BG_GC) ? 1 : 0; \
+ } \
} while (0)
#define stat_inc_tot_blk_count(si, blks) \
(si->tot_blks += (blks))
-#define stat_inc_data_blk_count(sbi, blks) \
+#define stat_inc_data_blk_count(sbi, blks, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->data_blks += (blks); \
+ si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \
} while (0)
-#define stat_inc_node_blk_count(sbi, blks) \
+#define stat_inc_node_blk_count(sbi, blks, gc_type) \
do { \
struct f2fs_stat_info *si = F2FS_STAT(sbi); \
stat_inc_tot_blk_count(si, blks); \
si->node_blks += (blks); \
+ si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \
} while (0)
int f2fs_build_stats(struct f2fs_sb_info *);
@@ -1661,10 +1767,10 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_seg_type(sbi, curseg)
#define stat_inc_block_count(sbi, curseg)
#define stat_inc_inplace_blocks(sbi)
-#define stat_inc_seg_count(si, type)
+#define stat_inc_seg_count(sbi, type, gc_type)
#define stat_inc_tot_blk_count(si, blks)
-#define stat_inc_data_blk_count(si, blks)
-#define stat_inc_node_blk_count(sbi, blks)
+#define stat_inc_data_blk_count(sbi, blks, gc_type)
+#define stat_inc_node_blk_count(sbi, blks, gc_type)
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -1688,6 +1794,7 @@ extern struct kmem_cache *inode_entry_slab;
*/
bool f2fs_may_inline(struct inode *);
void read_inline_data(struct page *, struct page *);
+bool truncate_inline_inode(struct page *, u64);
int f2fs_read_inline_data(struct inode *, struct page *);
int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
int f2fs_convert_inline_inode(struct inode *);
@@ -1697,7 +1804,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
struct page **);
struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
-int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
+ nid_t, umode_t);
void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
struct inode *, struct inode *);
bool f2fs_empty_inline_dir(struct inode *);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 98dac27bc3f7..a6f3f6186588 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -241,6 +241,8 @@ go_write:
* will be used only for fsynced inodes after checkpoint.
*/
try_to_fix_pino(inode);
+ clear_inode_flag(fi, FI_APPEND_WRITE);
+ clear_inode_flag(fi, FI_UPDATE_WRITE);
goto out;
}
sync_nodes:
@@ -433,8 +435,12 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
continue;
dn->data_blkaddr = NULL_ADDR;
- update_extent_cache(dn);
+ set_data_blkaddr(dn);
+ f2fs_update_extent_cache(dn);
invalidate_blocks(sbi, blkaddr);
+ if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
+ clear_inode_flag(F2FS_I(dn->inode),
+ FI_FIRST_BLOCK_WRITTEN);
nr_free++;
}
if (nr_free) {
@@ -454,15 +460,16 @@ void truncate_data_blocks(struct dnode_of_data *dn)
truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
}
-static int truncate_partial_data_page(struct inode *inode, u64 from)
+static int truncate_partial_data_page(struct inode *inode, u64 from,
+ bool force)
{
unsigned offset = from & (PAGE_CACHE_SIZE - 1);
struct page *page;
- if (!offset)
+ if (!offset && !force)
return 0;
- page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
+ page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, force);
if (IS_ERR(page))
return 0;
@@ -473,7 +480,8 @@ static int truncate_partial_data_page(struct inode *inode, u64 from)
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, offset, PAGE_CACHE_SIZE - offset);
- set_page_dirty(page);
+ if (!force)
+ set_page_dirty(page);
out:
f2fs_put_page(page, 1);
return 0;
@@ -487,6 +495,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
pgoff_t free_from;
int count = 0, err = 0;
struct page *ipage;
+ bool truncate_page = false;
trace_f2fs_truncate_blocks_enter(inode, from);
@@ -502,7 +511,10 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
+ if (truncate_inline_inode(ipage, from))
+ set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
+ truncate_page = true;
goto out;
}
@@ -533,7 +545,7 @@ out:
/* lastly zero out the first data page */
if (!err)
- err = truncate_partial_data_page(inode, from);
+ err = truncate_partial_data_page(inode, from, truncate_page);
trace_f2fs_truncate_blocks_exit(inode, err);
return err;
@@ -997,6 +1009,9 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
if (!f2fs_is_volatile_file(inode))
return 0;
+ if (!f2fs_is_first_block_written(inode))
+ return truncate_partial_data_page(inode, 0, true);
+
punch_hole(inode, 0, F2FS_BLKSIZE);
return 0;
}
@@ -1029,6 +1044,41 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
return ret;
}
+static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct super_block *sb = sbi->sb;
+ __u32 in;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(in, (__u32 __user *)arg))
+ return -EFAULT;
+
+ switch (in) {
+ case F2FS_GOING_DOWN_FULLSYNC:
+ sb = freeze_bdev(sb->s_bdev);
+ if (sb && !IS_ERR(sb)) {
+ f2fs_stop_checkpoint(sbi);
+ thaw_bdev(sb->s_bdev, sb);
+ }
+ break;
+ case F2FS_GOING_DOWN_METASYNC:
+ /* do checkpoint only */
+ f2fs_sync_fs(sb, 1);
+ f2fs_stop_checkpoint(sbi);
+ break;
+ case F2FS_GOING_DOWN_NOSYNC:
+ f2fs_stop_checkpoint(sbi);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1078,6 +1128,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_release_volatile_write(filp);
case F2FS_IOC_ABORT_VOLATILE_WRITE:
return f2fs_ioc_abort_volatile_write(filp);
+ case F2FS_IOC_SHUTDOWN:
+ return f2fs_ioc_shutdown(filp, arg);
case FITRIM:
return f2fs_ioc_fitrim(filp, arg);
default:
@@ -1104,8 +1156,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
const struct file_operations f2fs_file_operations = {
.llseek = f2fs_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.open = generic_file_open,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 76adbc3641f1..ed58211fe79b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -435,7 +435,7 @@ next_step:
set_page_dirty(node_page);
}
f2fs_put_page(node_page, 1);
- stat_inc_node_blk_count(sbi, 1);
+ stat_inc_node_blk_count(sbi, 1, gc_type);
}
if (initial) {
@@ -622,7 +622,7 @@ next_step:
if (IS_ERR(data_page))
continue;
move_data_page(inode, data_page, gc_type);
- stat_inc_data_blk_count(sbi, 1);
+ stat_inc_data_blk_count(sbi, 1, gc_type);
}
}
@@ -680,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
}
blk_finish_plug(&plug);
- stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)));
+ stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
stat_inc_call_count(sbi->stat_info);
f2fs_put_page(sum_page, 1);
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 1484c00133cd..8140e4f0e538 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -21,7 +21,7 @@ bool f2fs_may_inline(struct inode *inode)
if (f2fs_is_atomic_file(inode))
return false;
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
if (i_size_read(inode) > MAX_INLINE_DATA)
@@ -50,10 +50,19 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
-static void truncate_inline_data(struct page *ipage)
+bool truncate_inline_inode(struct page *ipage, u64 from)
{
+ void *addr;
+
+ if (from >= MAX_INLINE_DATA)
+ return false;
+
+ addr = inline_data_addr(ipage);
+
f2fs_wait_on_page_writeback(ipage, NODE);
- memset(inline_data_addr(ipage), 0, MAX_INLINE_DATA);
+ memset(addr + from, 0, MAX_INLINE_DATA - from);
+
+ return true;
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -122,7 +131,8 @@ no_update:
set_page_writeback(page);
fio.blk_addr = dn->data_blkaddr;
write_data_page(page, dn, &fio);
- update_extent_cache(dn);
+ set_data_blkaddr(dn);
+ f2fs_update_extent_cache(dn);
f2fs_wait_on_page_writeback(page, DATA);
if (dirty)
inode_dec_dirty_pages(dn->inode);
@@ -131,7 +141,7 @@ no_update:
set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
- truncate_inline_data(dn->inode_page);
+ truncate_inline_inode(dn->inode_page, 0);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
@@ -245,7 +255,7 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- truncate_inline_data(ipage);
+ truncate_inline_inode(ipage, 0);
f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
@@ -363,7 +373,7 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
- truncate_inline_data(ipage);
+ truncate_inline_inode(ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
@@ -380,21 +390,18 @@ out:
}
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
- struct inode *inode)
+ struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
- struct f2fs_dir_entry *de;
size_t namelen = name->len;
struct f2fs_inline_dentry *dentry_blk = NULL;
+ struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(namelen);
- struct page *page;
+ struct page *page = NULL;
int err = 0;
- int i;
-
- name_hash = f2fs_dentry_hash(name);
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
@@ -410,32 +417,34 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
goto out;
}
- down_write(&F2FS_I(inode)->i_sem);
- page = init_inode_metadata(inode, dir, name, ipage);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto fail;
+ if (inode) {
+ down_write(&F2FS_I(inode)->i_sem);
+ page = init_inode_metadata(inode, dir, name, ipage);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto fail;
+ }
}
f2fs_wait_on_page_writeback(ipage, NODE);
- de = &dentry_blk->dentry[bit_pos];
- de->hash_code = name_hash;
- de->name_len = cpu_to_le16(namelen);
- memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
- de->ino = cpu_to_le32(inode->i_ino);
- set_de_type(de, inode);
- for (i = 0; i < slots; i++)
- test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+
+ name_hash = f2fs_dentry_hash(name);
+ make_dentry_ptr(&d, (void *)dentry_blk, 2);
+ f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
+
set_page_dirty(ipage);
/* we don't need to mark_inode_dirty now */
- F2FS_I(inode)->i_pino = dir->i_ino;
- update_inode(inode, page);
- f2fs_put_page(page, 1);
+ if (inode) {
+ F2FS_I(inode)->i_pino = dir->i_ino;
+ update_inode(inode, page);
+ f2fs_put_page(page, 1);
+ }
update_parent_metadata(dir, inode, 0);
fail:
- up_write(&F2FS_I(inode)->i_sem);
+ if (inode)
+ up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode(dir, ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2d002e3738a7..e622ec95409e 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -51,6 +51,15 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
+static bool __written_first_block(struct f2fs_inode *ri)
+{
+ block_t addr = le32_to_cpu(ri->i_addr[0]);
+
+ if (addr != NEW_ADDR && addr != NULL_ADDR)
+ return true;
+ return false;
+}
+
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -130,7 +139,8 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
- get_extent_info(&fi->ext, ri->i_ext);
+ f2fs_init_extent_cache(inode, &ri->i_ext);
+
get_inline_info(fi, ri);
/* check data exist */
@@ -140,6 +150,9 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
+ if (__written_first_block(ri))
+ set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
+
f2fs_put_page(node_page, 1);
stat_inc_inline_inode(inode);
@@ -220,7 +233,11 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_links = cpu_to_le32(inode->i_nlink);
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
+
+ read_lock(&F2FS_I(inode)->ext_lock);
set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
+ read_unlock(&F2FS_I(inode)->ext_lock);
+
set_raw_inline(F2FS_I(inode), ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -328,6 +345,12 @@ void f2fs_evict_inode(struct inode *inode)
no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
+
+ /* update extent info in inode */
+ if (inode->i_nlink)
+ f2fs_preserve_extent_tree(inode);
+ f2fs_destroy_extent_tree(inode);
+
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e79639a9787a..407dde3d7a92 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -14,6 +14,7 @@
#include <linux/sched.h>
#include <linux/ctype.h>
#include <linux/dcache.h>
+#include <linux/namei.h>
#include "f2fs.h"
#include "node.h"
@@ -187,6 +188,44 @@ struct dentry *f2fs_get_parent(struct dentry *child)
return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
}
+static int __recover_dot_dentries(struct inode *dir, nid_t pino)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct qstr dot = QSTR_INIT(".", 1);
+ struct qstr dotdot = QSTR_INIT("..", 2);
+ struct f2fs_dir_entry *de;
+ struct page *page;
+ int err = 0;
+
+ f2fs_lock_op(sbi);
+
+ de = f2fs_find_entry(dir, &dot, &page);
+ if (de) {
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ } else {
+ err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR);
+ if (err)
+ goto out;
+ }
+
+ de = f2fs_find_entry(dir, &dotdot, &page);
+ if (de) {
+ f2fs_dentry_kunmap(dir, page);
+ f2fs_put_page(page, 0);
+ } else {
+ err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR);
+ }
+out:
+ if (!err) {
+ clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS);
+ mark_inode_dirty(dir);
+ }
+
+ f2fs_unlock_op(sbi);
+ return err;
+}
+
static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
@@ -206,6 +245,16 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
inode = f2fs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
+
+ if (f2fs_has_inline_dots(inode)) {
+ int err;
+
+ err = __recover_dot_dentries(inode, dir->i_ino);
+ if (err) {
+ iget_failed(inode);
+ return ERR_PTR(err);
+ }
+ }
}
return d_splice_alias(inode, dentry);
@@ -247,6 +296,23 @@ fail:
return err;
}
+static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct page *page;
+
+ page = page_follow_link_light(dentry, nd);
+ if (IS_ERR(page))
+ return page;
+
+ /* this is broken symlink case */
+ if (*nd_get_link(nd) == 0) {
+ kunmap(page);
+ page_cache_release(page);
+ return ERR_PTR(-ENOENT);
+ }
+ return page;
+}
+
static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
@@ -276,6 +342,17 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ /*
+ * Let's flush symlink data in order to avoid broken symlink as much as
+ * possible. Nevertheless, fsyncing is the best way, but there is no
+ * way to get a file descriptor in order to flush that.
+ *
+ * Note that, it needs to do dir->fsync to make this recoverable.
+ * If the symlink path is stored into inline_data, there is no
+ * performance regression.
+ */
+ filemap_write_and_wait_range(inode->i_mapping, 0, symlen - 1);
+
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
return err;
@@ -693,6 +770,8 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
+
+ stat_inc_inline_inode(inode);
d_tmpfile(dentry, inode);
unlock_new_inode(inode);
return 0;
@@ -729,7 +808,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
const struct inode_operations f2fs_symlink_inode_operations = {
.readlink = generic_readlink,
- .follow_link = page_follow_link_light,
+ .follow_link = f2fs_follow_link,
.put_link = page_put_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 97bd9d3db882..8ab0cf1930bd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -41,7 +41,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
/* only uses low memory */
avail_ram = val.totalram - val.totalhigh;
- /* give 25%, 25%, 50%, 50% memory for each components respectively */
+ /*
+ * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
+ */
if (type == FREE_NIDS) {
mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
PAGE_CACHE_SHIFT;
@@ -62,6 +64,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
mem_size += (sbi->im[i].ino_num *
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == EXTENT_CACHE) {
+ mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
+ atomic_read(&sbi->total_ext_node) *
+ sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
@@ -494,7 +501,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
/* if inline_data is set, should not report any block indices */
if (f2fs_has_inline_data(dn->inode) && index) {
- err = -EINVAL;
+ err = -ENOENT;
f2fs_put_page(npage[0], 1);
goto release_out;
}
@@ -995,6 +1002,7 @@ static int read_node_page(struct page *page, int rw)
get_node_info(sbi, page->index, &ni);
if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ ClearPageUptodate(page);
f2fs_put_page(page, 1);
return -ENOENT;
}
@@ -1306,6 +1314,7 @@ static int f2fs_write_node_page(struct page *page,
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
return 0;
@@ -1821,6 +1830,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
/*
* there are two steps to flush nat entries:
@@ -1874,7 +1884,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, set->entry_cnt);
+ down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+ up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set);
}
@@ -1902,6 +1914,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
+ down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
@@ -1910,6 +1923,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}
+ up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index f405bbf2435a..c56026f1725c 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -120,6 +120,7 @@ enum mem_type {
NAT_ENTRIES, /* indicates the cached nat entry */
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
+ EXTENT_CACHE, /* indicates extent cache */
BASE_CHECK, /* check kernel status */
};
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 41afb9534bbd..8d8ea99f2156 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -93,10 +93,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage)
}
retry:
de = f2fs_find_entry(dir, &name, &page);
- if (de && inode->i_ino == le32_to_cpu(de->ino)) {
- clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
+ if (de && inode->i_ino == le32_to_cpu(de->ino))
goto out_unmap_put;
- }
+
if (de) {
einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
if (IS_ERR(einode)) {
@@ -115,7 +114,7 @@ retry:
iput(einode);
goto retry;
}
- err = __f2fs_add_link(dir, &name, inode);
+ err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
if (err)
goto out_err;
@@ -187,11 +186,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
- if (entry) {
- if (IS_INODE(page) && is_dent_dnode(page))
- set_inode_flag(F2FS_I(entry->inode),
- FI_INC_LINK);
- } else {
+ if (!entry) {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
@@ -212,8 +207,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
if (IS_ERR(entry->inode)) {
err = PTR_ERR(entry->inode);
kmem_cache_free(fsync_entry_slab, entry);
- if (err == -ENOENT)
+ if (err == -ENOENT) {
+ err = 0;
goto next;
+ }
break;
}
list_add_tail(&entry->list, head);
@@ -256,6 +253,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct f2fs_summary_block *sum_node;
struct f2fs_summary sum;
struct page *sum_page, *node_page;
+ struct dnode_of_data tdn = *dn;
nid_t ino, nid;
struct inode *inode;
unsigned int offset;
@@ -283,17 +281,15 @@ got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
if (dn->inode->i_ino == nid) {
- struct dnode_of_data tdn = *dn;
tdn.nid = nid;
+ if (!dn->inode_page_locked)
+ lock_page(dn->inode_page);
tdn.node_page = dn->inode_page;
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
- truncate_data_blocks_range(&tdn, 1);
- return 0;
+ goto truncate_out;
} else if (dn->nid == nid) {
- struct dnode_of_data tdn = *dn;
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
- truncate_data_blocks_range(&tdn, 1);
- return 0;
+ goto truncate_out;
}
/* Get the node page */
@@ -317,18 +313,33 @@ got_it:
bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
le16_to_cpu(sum.ofs_in_node);
- if (ino != dn->inode->i_ino) {
- truncate_hole(inode, bidx, bidx + 1);
+ /*
+ * if inode page is locked, unlock temporarily, but its reference
+ * count keeps alive.
+ */
+ if (ino == dn->inode->i_ino && dn->inode_page_locked)
+ unlock_page(dn->inode_page);
+
+ set_new_dnode(&tdn, inode, NULL, NULL, 0);
+ if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
+ goto out;
+
+ if (tdn.data_blkaddr == blkaddr)
+ truncate_data_blocks_range(&tdn, 1);
+
+ f2fs_put_dnode(&tdn);
+out:
+ if (ino != dn->inode->i_ino)
iput(inode);
- } else {
- struct dnode_of_data tdn;
- set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
- if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
- return 0;
- if (tdn.data_blkaddr != NULL_ADDR)
- truncate_data_blocks_range(&tdn, 1);
- f2fs_put_page(tdn.node_page, 1);
- }
+ else if (dn->inode_page_locked)
+ lock_page(dn->inode_page);
+ return 0;
+
+truncate_out:
+ if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
+ truncate_data_blocks_range(&tdn, 1);
+ if (dn->inode->i_ino == nid && !dn->inode_page_locked)
+ unlock_page(dn->inode_page);
return 0;
}
@@ -384,7 +395,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
src = datablock_addr(dn.node_page, dn.ofs_in_node);
dest = datablock_addr(page, dn.ofs_in_node);
- if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
+ if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
+ dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {
+
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
/* We should not get -ENOSPC */
@@ -401,14 +414,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
/* write dummy data page */
recover_data_page(sbi, NULL, &sum, src, dest);
dn.data_blkaddr = dest;
- update_extent_cache(&dn);
+ set_data_blkaddr(&dn);
+ f2fs_update_extent_cache(&dn);
recovered++;
}
dn.ofs_in_node++;
}
- /* write node page in place */
- set_summary(&sum, dn.nid, 0, 0);
if (IS_INODE(dn.node_page))
sync_inode_page(&dn);
@@ -552,7 +564,7 @@ out:
mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) {
struct cp_control cpc = {
- .reason = CP_SYNC,
+ .reason = CP_RECOVERY,
};
mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, &cpc);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index daee4ab913da..f939660941bb 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -205,6 +205,8 @@ retry:
list_add_tail(&new->list, &fi->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
mutex_unlock(&fi->inmem_lock);
+
+ trace_f2fs_register_inmem_page(page, INMEM);
}
void commit_inmem_pages(struct inode *inode, bool abort)
@@ -238,11 +240,13 @@ void commit_inmem_pages(struct inode *inode, bool abort)
f2fs_wait_on_page_writeback(cur->page, DATA);
if (clear_page_dirty_for_io(cur->page))
inode_dec_dirty_pages(inode);
+ trace_f2fs_commit_inmem_page(cur->page, INMEM);
do_write_data_page(cur->page, &fio);
submit_bio = true;
}
f2fs_put_page(cur->page, 1);
} else {
+ trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
put_page(cur->page);
}
radix_tree_delete(&fi->inmem_root, cur->page->index);
@@ -277,6 +281,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
+ /* try to shrink extent cache when there is no enough memory */
+ f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
+
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
excess_prefree_segs(sbi) ||
@@ -549,7 +556,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
- if (end - start < cpc->trim_minlen)
+ if (force && end - start < cpc->trim_minlen)
continue;
__add_discard_entry(sbi, cpc, start, end);
@@ -1164,6 +1171,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
+ mutex_lock(&sit_i->sentry_lock);
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff)
@@ -1178,7 +1186,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
*/
__add_sum_entry(sbi, type, sum);
- mutex_lock(&sit_i->sentry_lock);
__refresh_next_blkoff(sbi, curseg);
stat_inc_block_count(sbi, curseg);
@@ -1730,6 +1737,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
+ if (!sit_i->dirty_sentries)
+ goto out;
+
/*
* add and account sit entries of dirty bitmap in sit entry
* set temporarily
@@ -1744,9 +1754,6 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
remove_sits_in_journal(sbi);
- if (!sit_i->dirty_sentries)
- goto out;
-
/*
* there are two steps to flush sit entries:
* #1, flush sit entries to journal in current cold data summary block.
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 7fd35111cf62..85d7fa7514b2 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -336,7 +336,8 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
clear_bit(segno, free_i->free_segmap);
free_i->free_segments++;
- next = find_next_bit(free_i->free_segmap, MAIN_SEGS(sbi), start_segno);
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f2fe666a6ea9..160b88346b24 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -57,6 +57,8 @@ enum {
Opt_flush_merge,
Opt_nobarrier,
Opt_fastboot,
+ Opt_extent_cache,
+ Opt_noinline_data,
Opt_err,
};
@@ -78,6 +80,8 @@ static match_table_t f2fs_tokens = {
{Opt_flush_merge, "flush_merge"},
{Opt_nobarrier, "nobarrier"},
{Opt_fastboot, "fastboot"},
+ {Opt_extent_cache, "extent_cache"},
+ {Opt_noinline_data, "noinline_data"},
{Opt_err, NULL},
};
@@ -367,6 +371,12 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_fastboot:
set_opt(sbi, FASTBOOT);
break;
+ case Opt_extent_cache:
+ set_opt(sbi, EXTENT_CACHE);
+ break;
+ case Opt_noinline_data:
+ clear_opt(sbi, INLINE_DATA);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -392,7 +402,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
- rwlock_init(&fi->ext.ext_lock);
+ rwlock_init(&fi->ext_lock);
init_rwsem(&fi->i_sem);
INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
INIT_LIST_HEAD(&fi->inmem_pages);
@@ -591,6 +601,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_ext_identify");
if (test_opt(sbi, INLINE_DATA))
seq_puts(seq, ",inline_data");
+ else
+ seq_puts(seq, ",noinline_data");
if (test_opt(sbi, INLINE_DENTRY))
seq_puts(seq, ",inline_dentry");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
@@ -599,6 +611,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",nobarrier");
if (test_opt(sbi, FASTBOOT))
seq_puts(seq, ",fastboot");
+ if (test_opt(sbi, EXTENT_CACHE))
+ seq_puts(seq, ",extent_cache");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
@@ -959,7 +973,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct buffer_head *raw_super_buf;
struct inode *root;
long err = -EINVAL;
- bool retry = true;
+ bool retry = true, need_fsck = false;
char *options = NULL;
int i;
@@ -984,6 +998,7 @@ try_onemore:
sbi->active_logs = NR_CURSEG_TYPE;
set_opt(sbi, BG_GC);
+ set_opt(sbi, INLINE_DATA);
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1020,7 +1035,6 @@ try_onemore:
sbi->raw_super = raw_super;
sbi->raw_super_buf = raw_super_buf;
mutex_init(&sbi->gc_mutex);
- mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -1072,6 +1086,8 @@ try_onemore:
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
+ init_extent_cache_info(sbi);
+
init_ino_entry_info(sbi);
/* setup f2fs internal modules */
@@ -1146,9 +1162,6 @@ try_onemore:
if (err)
goto free_proc;
- if (!retry)
- set_sbi_flag(sbi, SBI_NEED_FSCK);
-
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
/*
@@ -1160,8 +1173,13 @@ try_onemore:
err = -EROFS;
goto free_kobj;
}
+
+ if (need_fsck)
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
err = recover_fsync_data(sbi);
if (err) {
+ need_fsck = true;
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%ld", err);
goto free_kobj;
@@ -1212,7 +1230,7 @@ free_sbi:
/* give only one another chance */
if (retry) {
- retry = 0;
+ retry = false;
shrink_dcache_sb(sb);
goto try_onemore;
}
@@ -1278,10 +1296,13 @@ static int __init init_f2fs_fs(void)
err = create_checkpoint_caches();
if (err)
goto free_segment_manager_caches;
+ err = create_extent_cache();
+ if (err)
+ goto free_checkpoint_caches;
f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj);
if (!f2fs_kset) {
err = -ENOMEM;
- goto free_checkpoint_caches;
+ goto free_extent_cache;
}
err = register_filesystem(&f2fs_fs_type);
if (err)
@@ -1292,6 +1313,8 @@ static int __init init_f2fs_fs(void)
free_kset:
kset_unregister(f2fs_kset);
+free_extent_cache:
+ destroy_extent_cache();
free_checkpoint_caches:
destroy_checkpoint_caches();
free_segment_manager_caches:
@@ -1309,6 +1332,7 @@ static void __exit exit_f2fs_fs(void)
remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
+ destroy_extent_cache();
destroy_checkpoint_caches();
destroy_segment_manager_caches();
destroy_node_manager_caches();
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 5072bf9ae0ef..b0fd2f2d0716 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -135,7 +135,8 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
if (strcmp(name, "") != 0)
return -EINVAL;
- *((char *)buffer) = F2FS_I(inode)->i_advise;
+ if (buffer)
+ *((char *)buffer) = F2FS_I(inode)->i_advise;
return sizeof(char);
}
@@ -152,6 +153,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
+ mark_inode_dirty(inode);
return 0;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..93fc62232ec2 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -8,9 +8,7 @@
* May 1999. AV. Fixed the bogosity with FAT32 (read "FAT28"). Fscking lusers.
*/
-#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include "fat.h"
/* this must be > 0. */
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index c5d6bb939d19..4afc4d9d2e41 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -13,13 +13,9 @@
* Short name translation 1999, 2001 by Wolfram Pienkoss <wp@bszh.de>
*/
-#include <linux/module.h>
#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/buffer_head.h>
#include <linux/compat.h>
#include <linux/uaccess.h>
-#include <linux/kernel.h>
#include "fat.h"
/*
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 64e295e8ff38..be5e15323bab 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -2,11 +2,8 @@
#define _FAT_H
#include <linux/buffer_head.h>
-#include <linux/string.h>
#include <linux/nls.h>
-#include <linux/fs.h>
#include <linux/hash.h>
-#include <linux/mutex.h>
#include <linux/ratelimit.h>
#include <linux/msdos_fs.h>
@@ -66,7 +63,7 @@ struct msdos_sb_info {
unsigned short sec_per_clus; /* sectors/cluster */
unsigned short cluster_bits; /* log2(cluster_size) */
unsigned int cluster_size; /* cluster size */
- unsigned char fats, fat_bits; /* number of FATs, FAT bits (12 or 16) */
+ unsigned char fats, fat_bits; /* number of FATs, FAT bits (12,16 or 32) */
unsigned short fat_start;
unsigned long fat_length; /* FAT start & length (sec.) */
unsigned long dir_start;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 260705c58062..8226557130a2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -3,9 +3,6 @@
* Released under GPL v2.
*/
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/msdos_fs.h>
#include <linux/blkdev.h>
#include "fat.h"
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8429c68e3057..cf50d93565a2 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,10 +10,6 @@
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/mount.h>
-#include <linux/time.h>
-#include <linux/buffer_head.h>
-#include <linux/writeback.h>
-#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
@@ -170,8 +166,6 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
const struct file_operations fat_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5263c7..c06774658345 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -11,21 +11,12 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/slab.h>
-#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
-#include <linux/buffer_head.h>
-#include <linux/mount.h>
-#include <linux/aio.h>
#include <linux/vfs.h>
+#include <linux/seq_file.h>
#include <linux/parser.h>
#include <linux/uio.h>
-#include <linux/writeback.h>
-#include <linux/log2.h>
-#include <linux/hash.h>
#include <linux/blkdev.h>
#include <asm/unaligned.h>
#include "fat.h"
@@ -246,8 +237,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
return err;
}
-static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
struct file *file = iocb->ki_filp;
@@ -256,7 +246,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
ssize_t ret;
- if (rw == WRITE) {
+ if (iov_iter_rw(iter) == WRITE) {
/*
* FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
* so we need to update the ->mmu_private to block boundary.
@@ -275,8 +265,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
- if (ret < 0 && (rw & WRITE))
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block);
+ if (ret < 0 && iov_iter_rw(iter) == WRITE)
fat_write_failed(mapping, offset + count);
return ret;
@@ -1280,8 +1270,7 @@ out:
static int fat_read_root(struct inode *inode)
{
- struct super_block *sb = inode->i_sb;
- struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index d8da2d2e30ae..c4589e981760 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -6,10 +6,6 @@
* and date_dos2unix for date==0 by Igor Zhbanov(bsg@uniyar.ac.ru)
*/
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/time.h>
#include "fat.h"
/*
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index a783b0e1272a..cc6a8541b668 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -7,8 +7,6 @@
*/
#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/buffer_head.h>
#include "fat.h"
/* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b8b92c2f9683..7e0974eebd8e 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -16,10 +16,8 @@
*/
#include <linux/module.h>
-#include <linux/jiffies.h>
#include <linux/ctype.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/namei.h>
#include "fat.h"
diff --git a/fs/file.c b/fs/file.c
index ee738ea028fa..93c5f89c248b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -638,8 +638,7 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
file = fcheck_files(files, fd);
if (file) {
/* File object ref couldn't be taken */
- if ((file->f_mode & mask) ||
- !atomic_long_inc_not_zero(&file->f_count))
+ if ((file->f_mode & mask) || !get_file_rcu(file))
file = NULL;
}
rcu_read_unlock();
diff --git a/fs/file_table.c b/fs/file_table.c
index 3f85411b03ce..294174dcc226 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -168,10 +168,10 @@ struct file *alloc_file(struct path *path, fmode_t mode,
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
if ((mode & FMODE_READ) &&
- likely(fop->read || fop->aio_read || fop->read_iter))
+ likely(fop->read || fop->read_iter))
mode |= FMODE_CAN_READ;
if ((mode & FMODE_WRITE) &&
- likely(fop->write || fop->aio_write || fop->write_iter))
+ likely(fop->write || fop->write_iter))
mode |= FMODE_CAN_WRITE;
file->f_mode = mode;
file->f_op = fop;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
+/*
+ * If an inode is constantly having its pages dirtied, but then the
+ * updates stop dirtytime_expire_interval seconds in the past, it's
+ * possible for the worst case time between when an inode has its
+ * timestamps updated and when they finally get written out to be two
+ * dirtytime_expire_intervals. We set the default to 12 hours (in
+ * seconds), which means most of the time inodes will have their
+ * timestamps written to disk after 12 hours, but in the worst case a
+ * few inodes might not their timestamps updated for 24 hours.
+ */
+unsigned int dirtytime_expire_interval = 12 * 60 * 60;
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
if ((flags & EXPIRE_DIRTY_ATIME) == 0)
older_than_this = work->older_than_this;
- else if ((work->reason == WB_REASON_SYNC) == 0) {
- expire_time = jiffies - (HZ * 86400);
+ else if (!work->for_sync) {
+ expire_time = jiffies - (dirtytime_expire_interval * HZ);
older_than_this = &expire_time;
}
while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
*/
redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
+ inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &wb->b_dirty_time);
} else {
/* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
- if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
- (inode->i_state & I_DIRTY_TIME)) ||
- (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
- dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
- trace_writeback_lazytime(inode);
- }
+ if (inode->i_state & I_DIRTY_TIME) {
+ if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
+ unlikely(time_after(jiffies,
+ (inode->dirtied_time_when +
+ dirtytime_expire_interval * HZ)))) {
+ dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+ trace_writeback_lazytime(inode);
+ }
+ } else
+ inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
inode->i_state &= ~dirty;
/*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
rcu_read_unlock();
}
+/*
+ * Wake up bdi's periodically to make sure dirtytime inodes gets
+ * written back periodically. We deliberately do *not* check the
+ * b_dirtytime list in wb_has_dirty_io(), since this would cause the
+ * kernel to be constantly waking up once there are any dirtytime
+ * inodes on the system. So instead we define a separate delayed work
+ * function which gets called much more rarely. (By default, only
+ * once every 12 hours.)
+ *
+ * If there is any other write activity going on in the file system,
+ * this function won't be necessary. But if the only thing that has
+ * happened on the file system is a dirtytime inode caused by an atime
+ * update, we need this infrastructure below to make sure that inode
+ * eventually gets pushed out to disk.
+ */
+static void wakeup_dirtytime_writeback(struct work_struct *w);
+static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
+
+static void wakeup_dirtytime_writeback(struct work_struct *w)
+{
+ struct backing_dev_info *bdi;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
+ if (list_empty(&bdi->wb.b_dirty_time))
+ continue;
+ bdi_wakeup_thread(bdi);
+ }
+ rcu_read_unlock();
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+}
+
+static int __init start_dirtytime_writeback(void)
+{
+ schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+ return 0;
+}
+__initcall(start_dirtytime_writeback);
+
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret == 0 && write)
+ mod_delayed_work(system_wq, &dirtytime_work, 0);
+ return ret;
+}
+
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
inode->dirtied_when = jiffies;
- list_move(&inode->i_wb_list, dirtytime ?
- &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
+ if (dirtytime)
+ inode->dirtied_time_when = jiffies;
+ if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+ list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+ else
+ list_move(&inode->i_wb_list,
+ &bdi->wb.b_dirty_time);
spin_unlock(&bdi->wb.list_lock);
trace_writeback_dirty_inode_enqueue(inode);
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index b06c98796afb..611b5408f6ec 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -9,8 +9,8 @@ static DEFINE_SPINLOCK(pin_lock);
void pin_remove(struct fs_pin *pin)
{
spin_lock(&pin_lock);
- hlist_del(&pin->m_list);
- hlist_del(&pin->s_list);
+ hlist_del_init(&pin->m_list);
+ hlist_del_init(&pin->s_list);
spin_unlock(&pin_lock);
spin_lock_irq(&pin->wait.lock);
pin->done = 1;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7abba1c..e5bbf748b698 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
-#include <linux/aio.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
@@ -48,6 +47,7 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
+#include <linux/uio.h>
#include "fuse_i.h"
@@ -88,32 +88,23 @@ static struct list_head *cuse_conntbl_head(dev_t devt)
* FUSE file.
*/
-static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
{
+ struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
loff_t pos = 0;
- struct iovec iov = { .iov_base = buf, .iov_len = count };
- struct fuse_io_priv io = { .async = 0, .file = file };
- struct iov_iter ii;
- iov_iter_init(&ii, READ, &iov, 1, count);
- return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE);
+ return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
}
-static ssize_t cuse_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
+ struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
loff_t pos = 0;
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
- struct fuse_io_priv io = { .async = 0, .file = file };
- struct iov_iter ii;
- iov_iter_init(&ii, WRITE, &iov, 1, count);
-
/*
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
- return fuse_direct_io(&io, &ii, &pos,
+ return fuse_direct_io(&io, from, &pos,
FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
@@ -186,8 +177,8 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations cuse_frontend_fops = {
.owner = THIS_MODULE,
- .read = cuse_read,
- .write = cuse_write,
+ .read_iter = cuse_read_iter,
+ .write_iter = cuse_write_iter,
.open = cuse_open,
.release = cuse_release,
.unlocked_ioctl = cuse_file_ioctl,
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d622fa..c8b68ab2e574 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
-#include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -711,28 +710,26 @@ struct fuse_copy_state {
struct fuse_conn *fc;
int write;
struct fuse_req *req;
- const struct iovec *iov;
+ struct iov_iter *iter;
struct pipe_buffer *pipebufs;
struct pipe_buffer *currbuf;
struct pipe_inode_info *pipe;
unsigned long nr_segs;
- unsigned long seglen;
- unsigned long addr;
struct page *pg;
unsigned len;
unsigned offset;
unsigned move_pages:1;
};
-static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
+static void fuse_copy_init(struct fuse_copy_state *cs,
+ struct fuse_conn *fc,
int write,
- const struct iovec *iov, unsigned long nr_segs)
+ struct iov_iter *iter)
{
memset(cs, 0, sizeof(*cs));
cs->fc = fc;
cs->write = write;
- cs->iov = iov;
- cs->nr_segs = nr_segs;
+ cs->iter = iter;
}
/* Unmap and put previous page of userspace buffer */
@@ -800,22 +797,16 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->nr_segs++;
}
} else {
- if (!cs->seglen) {
- BUG_ON(!cs->nr_segs);
- cs->seglen = cs->iov[0].iov_len;
- cs->addr = (unsigned long) cs->iov[0].iov_base;
- cs->iov++;
- cs->nr_segs--;
- }
- err = get_user_pages_fast(cs->addr, 1, cs->write, &page);
+ size_t off;
+ err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
if (err < 0)
return err;
- BUG_ON(err != 1);
+ BUG_ON(!err);
+ cs->len = err;
+ cs->offset = off;
cs->pg = page;
- cs->offset = cs->addr % PAGE_SIZE;
- cs->len = min(PAGE_SIZE - cs->offset, cs->seglen);
- cs->seglen -= cs->len;
- cs->addr += cs->len;
+ cs->offset = off;
+ iov_iter_advance(cs->iter, err);
}
return lock_request(cs->fc, cs->req);
@@ -890,8 +881,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
newpage = buf->page;
- if (WARN_ON(!PageUptodate(newpage)))
- return -EIO;
+ if (!PageUptodate(newpage))
+ SetPageUptodate(newpage);
ClearPageMappedToDisk(newpage);
@@ -1353,8 +1344,18 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
return err;
}
-static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static int fuse_dev_open(struct inode *inode, struct file *file)
+{
+ /*
+ * The fuse device's file's private_data is used to hold
+ * the fuse_conn(ection) when it is mounted, and is used to
+ * keep track of whether the file has been mounted already.
+ */
+ file->private_data = NULL;
+ return 0;
+}
+
+static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
{
struct fuse_copy_state cs;
struct file *file = iocb->ki_filp;
@@ -1362,9 +1363,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
if (!fc)
return -EPERM;
- fuse_copy_init(&cs, fc, 1, iov, nr_segs);
+ if (!iter_is_iovec(to))
+ return -EINVAL;
+
+ fuse_copy_init(&cs, fc, 1, to);
- return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
+ return fuse_dev_do_read(fc, file, &cs, iov_iter_count(to));
}
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
@@ -1384,7 +1388,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
if (!bufs)
return -ENOMEM;
- fuse_copy_init(&cs, fc, 1, NULL, 0);
+ fuse_copy_init(&cs, fc, 1, NULL);
cs.pipebufs = bufs;
cs.pipe = pipe;
ret = fuse_dev_do_read(fc, in, &cs, len);
@@ -1797,6 +1801,9 @@ copy_finish:
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
+ /* Don't try to move pages (yet) */
+ cs->move_pages = 0;
+
switch (code) {
case FUSE_NOTIFY_POLL:
return fuse_notify_poll(fc, size, cs);
@@ -1957,17 +1964,19 @@ static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
return err;
}
-static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
{
struct fuse_copy_state cs;
struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
if (!fc)
return -EPERM;
- fuse_copy_init(&cs, fc, 0, iov, nr_segs);
+ if (!iter_is_iovec(from))
+ return -EINVAL;
+
+ fuse_copy_init(&cs, fc, 0, from);
- return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
+ return fuse_dev_do_write(fc, &cs, iov_iter_count(from));
}
static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
@@ -2030,8 +2039,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
}
pipe_unlock(pipe);
- fuse_copy_init(&cs, fc, 0, NULL, nbuf);
+ fuse_copy_init(&cs, fc, 0, NULL);
cs.pipebufs = bufs;
+ cs.nr_segs = nbuf;
cs.pipe = pipe;
if (flags & SPLICE_F_MOVE)
@@ -2217,12 +2227,11 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
const struct file_operations fuse_dev_operations = {
.owner = THIS_MODULE,
+ .open = fuse_dev_open,
.llseek = no_llseek,
- .read = do_sync_read,
- .aio_read = fuse_dev_read,
+ .read_iter = fuse_dev_read,
.splice_read = fuse_dev_splice_read,
- .write = do_sync_write,
- .aio_write = fuse_dev_write,
+ .write_iter = fuse_dev_write,
.splice_write = fuse_dev_splice_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3bdcfd8..5ef05b5c4cff 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/swap.h>
-#include <linux/aio.h>
#include <linux/falloc.h>
+#include <linux/uio.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
}
}
+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+ if (io->err)
+ return io->err;
+
+ if (io->bytes >= 0 && io->write)
+ return -EIO;
+
+ return io->bytes < 0 ? io->size : io->bytes;
+}
+
/**
* In case of short read, the caller sets 'pos' to the position of
* actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
*/
static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
{
+ bool is_sync = is_sync_kiocb(io->iocb);
int left;
spin_lock(&io->lock);
@@ -555,30 +567,24 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
io->bytes = pos;
left = --io->reqs;
+ if (!left && is_sync)
+ complete(io->done);
spin_unlock(&io->lock);
- if (!left) {
- long res;
+ if (!left && !is_sync) {
+ ssize_t res = fuse_get_res_by_io(io);
- if (io->err)
- res = io->err;
- else if (io->bytes >= 0 && io->write)
- res = -EIO;
- else {
- res = io->bytes < 0 ? io->size : io->bytes;
-
- if (!is_sync_kiocb(io->iocb)) {
- struct inode *inode = file_inode(io->iocb->ki_filp);
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
+ if (res >= 0) {
+ struct inode *inode = file_inode(io->iocb->ki_filp);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
- spin_lock(&fc->lock);
- fi->attr_version = ++fc->attr_version;
- spin_unlock(&fc->lock);
- }
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ spin_unlock(&fc->lock);
}
- aio_complete(io->iocb, res, 0);
+ io->iocb->ki_complete(io->iocb, res, 0);
kfree(io);
}
}
@@ -1139,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count = iov_iter_count(from);
ssize_t written = 0;
ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
loff_t endbyte = 0;
- loff_t pos = iocb->ki_pos;
if (get_fuse_conn(inode)->writeback_cache) {
/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1161,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
+ err = generic_write_checks(iocb, from);
+ if (err <= 0)
goto out;
- if (count == 0)
- goto out;
-
- iov_iter_truncate(from, count);
err = file_remove_suid(file);
if (err)
goto out;
@@ -1177,7 +1177,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
- if (file->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ loff_t pos = iocb->ki_pos;
written = generic_file_direct_write(iocb, from, pos);
if (written < 0 || !iov_iter_count(from))
goto out;
@@ -1203,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
} else {
- written = fuse_perform_write(file, mapping, from, pos);
+ written = fuse_perform_write(file, mapping, from, iocb->ki_pos);
if (written >= 0)
- iocb->ki_pos = pos + written;
+ iocb->ki_pos += written;
}
out:
current->backing_dev_info = NULL;
@@ -1395,55 +1396,30 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
return res;
}
-static ssize_t fuse_direct_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct fuse_io_priv io = { .async = 0, .file = file };
- struct iovec iov = { .iov_base = buf, .iov_len = count };
- struct iov_iter ii;
- iov_iter_init(&ii, READ, &iov, 1, count);
- return __fuse_direct_read(&io, &ii, ppos);
-}
-
-static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
- struct iov_iter *iter,
- loff_t *ppos)
+static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct file *file = io->file;
- struct inode *inode = file_inode(file);
- size_t count = iov_iter_count(iter);
- ssize_t res;
-
-
- res = generic_write_checks(file, ppos, &count, 0);
- if (!res) {
- iov_iter_truncate(iter, count);
- res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
- }
-
- fuse_invalidate_attr(inode);
-
- return res;
+ struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp };
+ return __fuse_direct_read(&io, to, &iocb->ki_pos);
}
-static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
+ struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- ssize_t res;
struct fuse_io_priv io = { .async = 0, .file = file };
- struct iov_iter ii;
- iov_iter_init(&ii, WRITE, &iov, 1, count);
+ ssize_t res;
if (is_bad_inode(inode))
return -EIO;
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
- res = __fuse_direct_write(&io, &ii, ppos);
+ res = generic_write_checks(iocb, from);
if (res > 0)
- fuse_write_update_size(inode, *ppos);
+ res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
+ fuse_invalidate_attr(inode);
+ if (res > 0)
+ fuse_write_update_size(inode, iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
return res;
@@ -2798,9 +2774,9 @@ static inline loff_t fuse_round_up(loff_t off)
}
static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
+ DECLARE_COMPLETION_ONSTACK(wait);
ssize_t ret = 0;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
@@ -2815,15 +2791,15 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
inode = file->f_mapping->host;
i_size = i_size_read(inode);
- if ((rw == READ) && (offset > i_size))
+ if ((iov_iter_rw(iter) == READ) && (offset > i_size))
return 0;
/* optimization for short read */
- if (async_dio && rw != WRITE && offset + count > i_size) {
+ if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) {
if (offset >= i_size)
return 0;
- count = min_t(loff_t, count, fuse_round_up(i_size - offset));
- iov_iter_truncate(iter, count);
+ iov_iter_truncate(iter, fuse_round_up(i_size - offset));
+ count = iov_iter_count(iter);
}
io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2834,7 +2810,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
io->bytes = -1;
io->size = 0;
io->offset = offset;
- io->write = (rw == WRITE);
+ io->write = (iov_iter_rw(iter) == WRITE);
io->err = 0;
io->file = file;
/*
@@ -2849,13 +2825,19 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
* to wait on real async I/O requests, so we must submit this request
* synchronously.
*/
- if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
+ if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
+ iov_iter_rw(iter) == WRITE)
io->async = false;
- if (rw == WRITE)
- ret = __fuse_direct_write(io, iter, &pos);
- else
+ if (io->async && is_sync_kiocb(iocb))
+ io->done = &wait;
+
+ if (iov_iter_rw(iter) == WRITE) {
+ ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
+ fuse_invalidate_attr(inode);
+ } else {
ret = __fuse_direct_read(io, iter, &pos);
+ }
if (io->async) {
fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
@@ -2864,12 +2846,13 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
if (!is_sync_kiocb(iocb))
return -EIOCBQUEUED;
- ret = wait_on_sync_kiocb(iocb);
- } else {
- kfree(io);
+ wait_for_completion(&wait);
+ ret = fuse_get_res_by_io(io);
}
- if (rw == WRITE) {
+ kfree(io);
+
+ if (iov_iter_rw(iter) == WRITE) {
if (ret > 0)
fuse_write_update_size(inode, pos);
else if (ret < 0 && offset + count > i_size)
@@ -2957,9 +2940,7 @@ out:
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
- .read = new_sync_read,
.read_iter = fuse_file_read_iter,
- .write = new_sync_write,
.write_iter = fuse_file_write_iter,
.mmap = fuse_file_mmap,
.open = fuse_open,
@@ -2977,8 +2958,8 @@ static const struct file_operations fuse_file_operations = {
static const struct file_operations fuse_direct_io_file_operations = {
.llseek = fuse_file_llseek,
- .read = fuse_direct_read,
- .write = fuse_direct_write,
+ .read_iter = fuse_direct_read_iter,
+ .write_iter = fuse_direct_write_iter,
.mmap = fuse_direct_mmap,
.open = fuse_open,
.flush = fuse_flush,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07c1376..7354dc142a50 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
int err;
struct kiocb *iocb;
struct file *file;
+ struct completion *done;
};
/**
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7b3143064af1..1be3b061c05c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -110,11 +110,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
if (error)
goto out;
-
- if (acl)
- set_cached_acl(inode, type, acl);
- else
- forget_cached_acl(inode, type);
+ set_cached_acl(inode, type, acl);
out:
kfree(data);
return error;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94edebe..5551fea0afd7 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include <trace/events/writeback.h>
#include "gfs2.h"
@@ -671,12 +671,12 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (alloc_required) {
struct gfs2_alloc_parms ap = { .aflags = 0, };
- error = gfs2_quota_lock_check(ip);
+ requested = data_blocks + ind_blocks;
+ ap.target = requested;
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out_unlock;
- requested = data_blocks + ind_blocks;
- ap.target = requested;
error = gfs2_inplace_reserve(ip, &ap);
if (error)
goto out_qunlock;
@@ -1016,13 +1016,12 @@ out:
/**
* gfs2_ok_for_dio - check that dio is valid on this file
* @ip: The inode
- * @rw: READ or WRITE
* @offset: The offset at which we are reading or writing
*
* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
* 1 (to accept the i/o request)
*/
-static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
{
/*
* Should we return an error here? I can't see that O_DIRECT for
@@ -1039,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
rv = gfs2_glock_nq(&gh);
if (rv)
return rv;
- rv = gfs2_ok_for_dio(ip, rw, offset);
+ rv = gfs2_ok_for_dio(ip, offset);
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
@@ -1091,13 +1090,12 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
rv = filemap_write_and_wait_range(mapping, lstart, end);
if (rv)
goto out;
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
truncate_inode_pages_range(mapping, lstart, end);
}
- rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
- iter, offset,
- gfs2_get_block_direct, NULL, NULL, 0);
+ rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ offset, gfs2_get_block_direct, NULL, NULL, 0);
out:
gfs2_glock_dq(&gh);
gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index f0b945ab853e..61296ecbd0e2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1224,7 +1224,7 @@ static int do_grow(struct inode *inode, u64 size)
if (gfs2_is_stuffed(ip) &&
(size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8e2d7e..31892871ea87 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
-#include <linux/aio.h>
#include <linux/delay.h>
#include "gfs2.h"
@@ -429,11 +428,11 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_unlock;
- ret = gfs2_quota_lock_check(ip);
- if (ret)
- goto out_unlock;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
ap.target = data_blocks + ind_blocks;
+ ret = gfs2_quota_lock_check(ip, &ap);
+ if (ret)
+ goto out_unlock;
ret = gfs2_inplace_reserve(ip, &ap);
if (ret)
goto out_quota_unlock;
@@ -710,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
- if (file->f_flags & O_APPEND) {
+ if (iocb->ki_flags & IOCB_APPEND) {
struct gfs2_holder gh;
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -765,22 +764,30 @@ out:
brelse(dibh);
return error;
}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
- unsigned int *data_blocks, unsigned int *ind_blocks)
+/**
+ * calc_max_reserv() - Reverse of write_calc_reserv. Given a number of
+ * blocks, determine how many bytes can be written.
+ * @ip: The inode in question.
+ * @len: Max cap of bytes. What we return in *len must be <= this.
+ * @data_blocks: Compute and return the number of data blocks needed
+ * @ind_blocks: Compute and return the number of indirect blocks needed
+ * @max_blocks: The total blocks available to work with.
+ *
+ * Returns: void, but @len, @data_blocks and @ind_blocks are filled in.
+ */
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t *len,
+ unsigned int *data_blocks, unsigned int *ind_blocks,
+ unsigned int max_blocks)
{
+ loff_t max = *len;
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
for (tmp = max_data; tmp > sdp->sd_diptrs;) {
tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
max_data -= tmp;
}
- /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
- so it might end up with fewer data blocks */
- if (max_data <= *data_blocks)
- return;
+
*data_blocks = max_data;
*ind_blocks = max_blocks - max_data;
*len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
@@ -797,7 +804,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
- loff_t bytes, max_bytes;
+ loff_t bytes, max_bytes, max_blks = UINT_MAX;
int error;
const loff_t pos = offset;
const loff_t count = len;
@@ -819,6 +826,9 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
gfs2_size_hint(file, offset, len);
+ gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
+ ap.min_target = data_blocks + ind_blocks;
+
while (len > 0) {
if (len < bytes)
bytes = len;
@@ -827,27 +837,41 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
offset += bytes;
continue;
}
- error = gfs2_quota_lock_check(ip);
+
+ /* We need to determine how many bytes we can actually
+ * fallocate without exceeding quota or going over the
+ * end of the fs. We start off optimistically by assuming
+ * we can write max_bytes */
+ max_bytes = (len > max_chunk_size) ? max_chunk_size : len;
+
+ /* Since max_bytes is most likely a theoretical max, we
+ * calculate a more realistic 'bytes' to serve as a good
+ * starting point for the number of bytes we may be able
+ * to write */
+ gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ ap.target = data_blocks + ind_blocks;
+
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
-retry:
- gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+ /* ap.allowed tells us how many blocks quota will allow
+ * us to write. Check if this reduces max_blks */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
- ap.target = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip, &ap);
- if (error) {
- if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
- bytes >>= 1;
- bytes &= bsize_mask;
- if (bytes == 0)
- bytes = sdp->sd_sb.sb_bsize;
- goto retry;
- }
+ if (error)
goto out_qunlock;
- }
- max_bytes = bytes;
- calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
- &max_bytes, &data_blocks, &ind_blocks);
+
+ /* check if the selected rgrp limits our max_blks further */
+ if (ap.allowed && ap.allowed < max_blks)
+ max_blks = ap.allowed;
+
+ /* Almost done. Calculate bytes that can be written using
+ * max_blks. We also recompute max_bytes, data_blocks and
+ * ind_blocks */
+ calc_max_reserv(ip, &max_bytes, &data_blocks,
+ &ind_blocks, max_blks);
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks);
@@ -931,6 +955,22 @@ out_uninit:
return ret;
}
+static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ int error;
+ struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
+
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ return (ssize_t)error;
+
+ gfs2_size_hint(out, *ppos, len);
+
+ return iter_file_splice_write(pipe, out, ppos, len, flags);
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -1065,9 +1105,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
const struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = gfs2_file_write_iter,
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
@@ -1077,7 +1115,7 @@ const struct file_operations gfs2_file_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
};
@@ -1097,9 +1135,7 @@ const struct file_operations gfs2_dir_fops = {
const struct file_operations gfs2_file_fops_nolock = {
.llseek = gfs2_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = gfs2_file_write_iter,
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
@@ -1107,7 +1143,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.release = gfs2_release,
.fsync = gfs2_fsync,
.splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
};
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f42dffba056a..0fa8062f85a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2047,34 +2047,41 @@ static const struct file_operations gfs2_sbstats_fops = {
int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
- sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
- if (!sdp->debugfs_dir)
- return -ENOMEM;
- sdp->debugfs_dentry_glocks = debugfs_create_file("glocks",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glocks_fops);
- if (!sdp->debugfs_dentry_glocks)
+ struct dentry *dent;
+
+ dent = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
+ if (IS_ERR_OR_NULL(dent))
+ goto fail;
+ sdp->debugfs_dir = dent;
+
+ dent = debugfs_create_file("glocks",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glocks_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glocks = dent;
- sdp->debugfs_dentry_glstats = debugfs_create_file("glstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_glstats_fops);
- if (!sdp->debugfs_dentry_glstats)
+ dent = debugfs_create_file("glstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_glstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_glstats = dent;
- sdp->debugfs_dentry_sbstats = debugfs_create_file("sbstats",
- S_IFREG | S_IRUGO,
- sdp->debugfs_dir, sdp,
- &gfs2_sbstats_fops);
- if (!sdp->debugfs_dentry_sbstats)
+ dent = debugfs_create_file("sbstats",
+ S_IFREG | S_IRUGO,
+ sdp->debugfs_dir, sdp,
+ &gfs2_sbstats_fops);
+ if (IS_ERR_OR_NULL(dent))
goto fail;
+ sdp->debugfs_dentry_sbstats = dent;
return 0;
fail:
gfs2_delete_debugfs_file(sdp);
- return -ENOMEM;
+ return dent ? PTR_ERR(dent) : -ENOMEM;
}
void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
@@ -2100,6 +2107,8 @@ void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
int gfs2_register_debugfs(void)
{
gfs2_root = debugfs_create_dir("gfs2", NULL);
+ if (IS_ERR(gfs2_root))
+ return PTR_ERR(gfs2_root);
return gfs2_root ? 0 : -ENOMEM;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7a2dbbc0d634..58b75abf6ab2 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -301,8 +301,10 @@ struct gfs2_blkreserv {
* to the allocation code.
*/
struct gfs2_alloc_parms {
- u32 target;
+ u64 target;
+ u32 min_target;
u32 aflags;
+ u64 allowed;
};
enum {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 73c72253faac..08bc84d7e768 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -382,7 +382,7 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, };
int error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
goto out;
@@ -525,7 +525,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
int error;
if (da->nr_blocks) {
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto fail_quota_locks;
@@ -953,7 +953,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(dip);
+ error = gfs2_quota_lock_check(dip, &ap);
if (error)
goto out_gunlock;
@@ -1470,7 +1470,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (da.nr_blocks) {
struct gfs2_alloc_parms ap = { .target = da.nr_blocks, };
- error = gfs2_quota_lock_check(ndip);
+ error = gfs2_quota_lock_check(ndip, &ap);
if (error)
goto out_gunlock;
@@ -1669,6 +1669,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
kuid_t ouid, nuid;
kgid_t ogid, ngid;
int error;
+ struct gfs2_alloc_parms ap;
ouid = inode->i_uid;
ogid = inode->i_gid;
@@ -1696,9 +1697,11 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (error)
goto out;
+ ap.target = gfs2_get_inode_blocks(&ip->i_inode);
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- error = gfs2_quota_check(ip, nuid, ngid);
+ error = gfs2_quota_check(ip, nuid, ngid, &ap);
if (error)
goto out_gunlock_q;
}
@@ -1713,9 +1716,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
- u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
- gfs2_quota_change(ip, -blocks, ouid, ogid);
- gfs2_quota_change(ip, blocks, nuid, ngid);
+ gfs2_quota_change(ip, -ap.target, ouid, ogid);
+ gfs2_quota_change(ip, ap.target, nuid, ngid);
}
out_end_trans:
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 3aa17d4d1cfc..e3065cb9ab08 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -923,6 +923,9 @@ restart:
if (error)
return error;
+ if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+ force_refresh = FORCE;
+
qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr;
if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) {
@@ -974,11 +977,8 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
sizeof(struct gfs2_quota_data *), sort_qd, NULL);
for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
- int force = NO_FORCE;
qd = ip->i_res->rs_qa_qd[x];
- if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
- force = FORCE;
- error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
+ error = do_glock(qd, NO_FORCE, &ip->i_res->rs_qa_qd_ghs[x]);
if (error)
break;
}
@@ -1094,14 +1094,33 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
return 0;
}
-int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
+/**
+ * gfs2_quota_check - check if allocating new blocks will exceed quota
+ * @ip: The inode for which this check is being performed
+ * @uid: The uid to check against
+ * @gid: The gid to check against
+ * @ap: The allocation parameters. ap->target contains the requested
+ * blocks. ap->min_target, if set, contains the minimum blks
+ * requested.
+ *
+ * Returns: 0 on success.
+ * min_req = ap->min_target ? ap->min_target : ap->target;
+ * quota must allow atleast min_req blks for success and
+ * ap->allowed is set to the number of blocks allowed
+ *
+ * -EDQUOT otherwise, quota violation. ap->allowed is set to number
+ * of blocks available.
+ */
+int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
- s64 value;
+ s64 value, warn, limit;
unsigned int x;
int error = 0;
+ ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
if (!test_bit(GIF_QD_LOCKED, &ip->i_flags))
return 0;
@@ -1115,30 +1134,37 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
qid_eq(qd->qd_id, make_kqid_gid(gid))))
continue;
+ warn = (s64)be64_to_cpu(qd->qd_qb.qb_warn);
+ limit = (s64)be64_to_cpu(qd->qd_qb.qb_limit);
value = (s64)be64_to_cpu(qd->qd_qb.qb_value);
spin_lock(&qd_lock);
value += qd->qd_change;
spin_unlock(&qd_lock);
- if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) {
- print_message(qd, "exceeded");
- quota_send_warning(qd->qd_id,
- sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN);
-
- error = -EDQUOT;
- break;
- } else if (be64_to_cpu(qd->qd_qb.qb_warn) &&
- (s64)be64_to_cpu(qd->qd_qb.qb_warn) < value &&
+ if (limit > 0 && (limit - value) < ap->allowed)
+ ap->allowed = limit - value;
+ /* If we can't meet the target */
+ if (limit && limit < (value + (s64)ap->target)) {
+ /* If no min_target specified or we don't meet
+ * min_target, return -EDQUOT */
+ if (!ap->min_target || ap->min_target > ap->allowed) {
+ print_message(qd, "exceeded");
+ quota_send_warning(qd->qd_id,
+ sdp->sd_vfs->s_dev,
+ QUOTA_NL_BHARDWARN);
+ error = -EDQUOT;
+ break;
+ }
+ } else if (warn && warn < value &&
time_after_eq(jiffies, qd->qd_last_warn +
- gfs2_tune_get(sdp,
- gt_quota_warn_period) * HZ)) {
+ gfs2_tune_get(sdp, gt_quota_warn_period)
+ * HZ)) {
quota_send_warning(qd->qd_id,
sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN);
error = print_message(qd, "warning");
qd->qd_last_warn = jiffies;
}
}
-
return error;
}
@@ -1468,32 +1494,34 @@ int gfs2_quotad(void *data)
return 0;
}
-static int gfs2_quota_get_xstate(struct super_block *sb,
- struct fs_quota_stat *fqs)
+static int gfs2_quota_get_state(struct super_block *sb, struct qc_state *state)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- memset(fqs, 0, sizeof(struct fs_quota_stat));
- fqs->qs_version = FS_QSTAT_VERSION;
+ memset(state, 0, sizeof(*state));
switch (sdp->sd_args.ar_quota) {
case GFS2_QUOTA_ON:
- fqs->qs_flags |= (FS_QUOTA_UDQ_ENFD | FS_QUOTA_GDQ_ENFD);
+ state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
+ state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
/*FALLTHRU*/
case GFS2_QUOTA_ACCOUNT:
- fqs->qs_flags |= (FS_QUOTA_UDQ_ACCT | FS_QUOTA_GDQ_ACCT);
+ state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED |
+ QCI_SYSFILE;
+ state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED |
+ QCI_SYSFILE;
break;
case GFS2_QUOTA_OFF:
break;
}
-
if (sdp->sd_quota_inode) {
- fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr;
- fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks;
+ state->s_state[USRQUOTA].ino =
+ GFS2_I(sdp->sd_quota_inode)->i_no_addr;
+ state->s_state[USRQUOTA].blocks = sdp->sd_quota_inode->i_blocks;
}
- fqs->qs_uquota.qfs_nextents = 1; /* unsupported */
- fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */
- fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru);
+ state->s_state[USRQUOTA].nextents = 1; /* unsupported */
+ state->s_state[GRPQUOTA] = state->s_state[USRQUOTA];
+ state->s_incoredqs = list_lru_count(&gfs2_qd_lru);
return 0;
}
@@ -1638,7 +1666,7 @@ out_put:
const struct quotactl_ops gfs2_quotactl_ops = {
.quota_sync = gfs2_quota_sync,
- .get_xstate = gfs2_quota_get_xstate,
+ .get_state = gfs2_quota_get_state,
.get_dqblk = gfs2_get_dqblk,
.set_dqblk = gfs2_set_dqblk,
};
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 55d506eb3c4a..ad04b3acae2b 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -24,7 +24,8 @@ extern void gfs2_quota_unhold(struct gfs2_inode *ip);
extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unlock(struct gfs2_inode *ip);
-extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
+extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
kuid_t uid, kgid_t gid);
@@ -37,7 +38,8 @@ extern int gfs2_quotad(void *data);
extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp);
-static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
+static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int ret;
@@ -48,7 +50,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
return ret;
if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
- ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+ ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid, ap);
if (ret)
gfs2_quota_unlock(ip);
return ret;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9150207f365c..6af2396a317c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1946,10 +1946,18 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
* @ip: the inode to reserve space for
* @ap: the allocation parameters
*
- * Returns: errno
+ * We try our best to find an rgrp that has at least ap->target blocks
+ * available. After a couple of passes (loops == 2), the prospects of finding
+ * such an rgrp diminish. At this stage, we return the first rgrp that has
+ * atleast ap->min_target blocks available. Either way, we set ap->allowed to
+ * the number of blocks available in the chosen rgrp.
+ *
+ * Returns: 0 on success,
+ * -ENOMEM if a suitable rgrp can't be found
+ * errno otherwise
*/
-int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)
+int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
@@ -2012,7 +2020,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
/* Skip unuseable resource groups */
if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
GFS2_RDF_ERROR)) ||
- (ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+ (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb)
@@ -2027,11 +2035,13 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
goto check_rgrp;
/* If rgrp has enough free space, use it */
- if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) {
+ if (rs->rs_rbm.rgd->rd_free_clone >= ap->target ||
+ (loops == 2 && ap->min_target &&
+ rs->rs_rbm.rgd->rd_free_clone >= ap->min_target)) {
ip->i_rgd = rs->rs_rbm.rgd;
+ ap->allowed = ip->i_rgd->rd_free_clone;
return 0;
}
-
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b104f4af3afd..68972ecfbb01 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -41,7 +41,8 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
#define GFS2_AF_ORLOV 1
-extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap);
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip,
+ struct gfs2_alloc_parms *ap);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 0b81f783f787..fd260ce8869a 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -732,7 +732,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
return error;
- error = gfs2_quota_lock_check(ip);
+ error = gfs2_quota_lock_check(ip, &ap);
if (error)
return error;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 145566851e7a..36d1a6ae7655 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -197,7 +197,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode = hfs_new_inode(dir, &dentry->d_name, mode);
if (!inode)
- return -ENOSPC;
+ return -ENOMEM;
res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
if (res) {
@@ -226,7 +226,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode = hfs_new_inode(dir, &dentry->d_name, S_IFDIR | mode);
if (!inode)
- return -ENOSPC;
+ return -ENOMEM;
res = hfs_cat_create(inode->i_ino, dir, &dentry->d_name, inode);
if (res) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc81782..75fd5d873c19 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "hfs_fs.h"
#include "btree.h"
@@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
return res ? try_to_free_buffers(page) : 0;
}
-static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -133,13 +133,13 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
@@ -674,9 +674,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
static const struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index c1422d91cd36..528e38b5af7f 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -118,9 +118,7 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd,
int b, e;
int res;
- if (!rec_found)
- BUG();
-
+ BUG_ON(!rec_found);
b = 0;
e = bnode->num_recs - 1;
res = -ENOENT;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -370,6 +370,8 @@ again:
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd, hfs_find_rec_by_key);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 7892e6fddb66..022974ab6e3c 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -350,10 +350,11 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
&fd.search_key->cat.name.unicode,
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
- } else
+ } else {
err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
if (unlikely(err))
goto out;
+ }
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f0235c1640af..3074609befc3 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -434,7 +434,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
struct inode *inode;
- int res = -ENOSPC;
+ int res = -ENOMEM;
mutex_lock(&sbi->vh_mutex);
inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO);
@@ -476,7 +476,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
struct inode *inode;
- int res = -ENOSPC;
+ int res = -ENOMEM;
mutex_lock(&sbi->vh_mutex);
inode = hfsplus_new_inode(dir->i_sb, mode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f2d046..b0afedbef12b 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
@@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
return res ? try_to_free_buffers(page) : 0;
}
-static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -131,14 +131,13 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
- hfsplus_get_block);
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
@@ -254,6 +253,12 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
inode_dio_wait(inode);
+ if (attr->ia_size > inode->i_size) {
+ error = generic_cont_expand_simple(inode,
+ attr->ia_size);
+ if (error)
+ return error;
+ }
truncate_setsize(inode, attr->ia_size);
hfsplus_file_truncate(inode);
}
@@ -341,9 +346,7 @@ static const struct inode_operations hfsplus_file_inode_operations = {
static const struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.splice_read = generic_file_splice_read,
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index d3ff5cc317d7..8e98f5db6ad6 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -76,7 +76,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
{
struct inode *inode = file_inode(file);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
- unsigned int flags;
+ unsigned int flags, new_fl = 0;
int err = 0;
err = mnt_want_write_file(file);
@@ -110,14 +110,12 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
}
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
+ new_fl |= S_APPEND;
+
+ inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND);
if (flags & FS_NODUMP_FL)
hip->userflags |= HFSPLUS_FLG_NODUMP;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index d98094a9f476..89f262d8fcd8 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -44,7 +44,7 @@ static int strcmp_xattr_acl(const char *name)
return -1;
}
-static inline int is_known_namespace(const char *name)
+static bool is_known_namespace(const char *name)
{
if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
@@ -424,6 +424,28 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len)
return len;
}
+int hfsplus_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags,
+ const char *prefix, size_t prefixlen)
+{
+ char *xattr_name;
+ int res;
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
+ strcpy(xattr_name, prefix);
+ strcpy(xattr_name + prefixlen, name);
+ res = __hfsplus_setxattr(dentry->d_inode, xattr_name, value, size,
+ flags);
+ kfree(xattr_name);
+ return res;
+}
+
static ssize_t hfsplus_getxattr_finder_info(struct inode *inode,
void *value, size_t size)
{
@@ -560,6 +582,30 @@ failed_getxattr_init:
return res;
}
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size,
+ const char *prefix, size_t prefixlen)
+{
+ int res;
+ char *xattr_name;
+
+ if (!strcmp(name, ""))
+ return -EINVAL;
+
+ xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
+ GFP_KERNEL);
+ if (!xattr_name)
+ return -ENOMEM;
+
+ strcpy(xattr_name, prefix);
+ strcpy(xattr_name + prefixlen, name);
+
+ res = __hfsplus_getxattr(dentry->d_inode, xattr_name, value, size);
+ kfree(xattr_name);
+ return res;
+
+}
+
static inline int can_list(const char *xattr_name)
{
if (!xattr_name)
@@ -806,9 +852,6 @@ end_removexattr:
static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char *xattr_name;
- int res;
-
if (!strcmp(name, ""))
return -EINVAL;
@@ -818,24 +861,19 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
*/
if (is_known_namespace(name))
return -EOPNOTSUPP;
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
- + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
- strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
- res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
- kfree(xattr_name);
- return res;
+ /*
+ * osx is the namespace we use to indicate an unprefixed
+ * attribute on the filesystem (like the ones that OS X
+ * creates), so we pass the name through unmodified (after
+ * ensuring it doesn't conflict with another namespace).
+ */
+ return __hfsplus_getxattr(dentry->d_inode, name, buffer, size);
}
static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char *xattr_name;
- int res;
-
if (!strcmp(name, ""))
return -EINVAL;
@@ -845,16 +883,14 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
*/
if (is_known_namespace(name))
return -EOPNOTSUPP;
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN
- + XATTR_MAC_OSX_PREFIX_LEN + 1, GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_MAC_OSX_PREFIX);
- strcpy(xattr_name + XATTR_MAC_OSX_PREFIX_LEN, name);
- res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
- kfree(xattr_name);
- return res;
+ /*
+ * osx is the namespace we use to indicate an unprefixed
+ * attribute on the filesystem (like the ones that OS X
+ * creates), so we pass the name through unmodified (after
+ * ensuring it doesn't conflict with another namespace).
+ */
+ return __hfsplus_setxattr(dentry->d_inode, name, buffer, size, flags);
}
static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index 288530cf80b5..f9b0955b3d28 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -21,22 +21,16 @@ extern const struct xattr_handler *hfsplus_xattr_handlers[];
int __hfsplus_setxattr(struct inode *inode, const char *name,
const void *value, size_t size, int flags);
-static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
-}
+int hfsplus_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags,
+ const char *prefix, size_t prefixlen);
ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
- void *value, size_t size);
-
-static inline ssize_t hfsplus_getxattr(struct dentry *dentry,
- const char *name,
- void *value,
- size_t size)
-{
- return __hfsplus_getxattr(dentry->d_inode, name, value, size);
-}
+ void *value, size_t size);
+
+ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ void *value, size_t size,
+ const char *prefix, size_t prefixlen);
ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index 6ec5e107691f..aacff00a9ff9 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -16,43 +16,17 @@
static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char *xattr_name;
- int res;
-
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_SECURITY_PREFIX);
- strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
-
- res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
- kfree(xattr_name);
- return res;
+ return hfsplus_getxattr(dentry, name, buffer, size,
+ XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN);
}
static int hfsplus_security_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char *xattr_name;
- int res;
-
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_SECURITY_PREFIX);
- strcpy(xattr_name + XATTR_SECURITY_PREFIX_LEN, name);
-
- res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
- kfree(xattr_name);
- return res;
+ return hfsplus_setxattr(dentry, name, buffer, size, flags,
+ XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN);
}
static size_t hfsplus_security_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c
index 3c5f27e4746a..bcf65089b7f7 100644
--- a/fs/hfsplus/xattr_trusted.c
+++ b/fs/hfsplus/xattr_trusted.c
@@ -14,43 +14,16 @@
static int hfsplus_trusted_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char *xattr_name;
- int res;
-
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
- strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
-
- res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
- kfree(xattr_name);
- return res;
+ return hfsplus_getxattr(dentry, name, buffer, size,
+ XATTR_TRUSTED_PREFIX,
+ XATTR_TRUSTED_PREFIX_LEN);
}
static int hfsplus_trusted_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char *xattr_name;
- int res;
-
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_TRUSTED_PREFIX);
- strcpy(xattr_name + XATTR_TRUSTED_PREFIX_LEN, name);
-
- res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
- kfree(xattr_name);
- return res;
+ return hfsplus_setxattr(dentry, name, buffer, size, flags,
+ XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
}
static size_t hfsplus_trusted_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c
index 2b625a538b64..5aa0e6dc4a1e 100644
--- a/fs/hfsplus/xattr_user.c
+++ b/fs/hfsplus/xattr_user.c
@@ -14,43 +14,16 @@
static int hfsplus_user_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size, int type)
{
- char *xattr_name;
- int res;
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_USER_PREFIX);
- strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
-
- res = hfsplus_getxattr(dentry, xattr_name, buffer, size);
- kfree(xattr_name);
- return res;
+ return hfsplus_getxattr(dentry, name, buffer, size,
+ XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
}
static int hfsplus_user_setxattr(struct dentry *dentry, const char *name,
const void *buffer, size_t size, int flags, int type)
{
- char *xattr_name;
- int res;
-
- if (!strcmp(name, ""))
- return -EINVAL;
-
- xattr_name = kmalloc(NLS_MAX_CHARSET_SIZE * HFSPLUS_ATTR_MAX_STRLEN + 1,
- GFP_KERNEL);
- if (!xattr_name)
- return -ENOMEM;
- strcpy(xattr_name, XATTR_USER_PREFIX);
- strcpy(xattr_name + XATTR_USER_PREFIX_LEN, name);
-
- res = hfsplus_setxattr(dentry, xattr_name, buffer, size, flags);
- kfree(xattr_name);
- return res;
+ return hfsplus_setxattr(dentry, name, buffer, size, flags,
+ XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
}
static size_t hfsplus_user_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 4fcd40d6f308..91e19f9dffe5 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -66,7 +66,8 @@ extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
extern int access_file(char *path, int r, int w, int x);
extern int open_file(char *path, int r, int w, int append);
extern void *open_dir(char *path, int *err_out);
-extern char *read_dir(void *stream, unsigned long long *pos,
+extern void seek_dir(void *stream, unsigned long long pos);
+extern char *read_dir(void *stream, unsigned long long *pos_out,
unsigned long long *ino_out, int *len_out,
unsigned int *type_out);
extern void close_file(void *stream);
@@ -77,8 +78,7 @@ extern int write_file(int fd, unsigned long long *offset, const char *buf,
int len);
extern int lseek_file(int fd, long long offset, int whence);
extern int fsync_file(int fd, int datasync);
-extern int file_create(char *name, int ur, int uw, int ux, int gr,
- int gw, int gx, int or, int ow, int ox);
+extern int file_create(char *name, int mode);
extern int set_attr(const char *file, struct hostfs_iattr *attrs, int fd);
extern int make_symlink(const char *from, const char *to);
extern int unlink_file(const char *file);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fd62cae0fdcb..b83a0343378b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -24,6 +24,7 @@ struct hostfs_inode_info {
int fd;
fmode_t mode;
struct inode vfs_inode;
+ struct mutex open_mutex;
};
static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
@@ -92,16 +93,22 @@ static char *__dentry_name(struct dentry *dentry, char *name)
__putname(name);
return NULL;
}
+
+ /*
+ * This function relies on the fact that dentry_path_raw() will place
+ * the path name at the end of the provided buffer.
+ */
+ BUG_ON(p + strlen(p) + 1 != name + PATH_MAX);
+
strlcpy(name, root, PATH_MAX);
if (len > p - name) {
__putname(name);
return NULL;
}
- if (p > name + len) {
- char *s = name + len;
- while ((*s++ = *p++) != '\0')
- ;
- }
+
+ if (p > name + len)
+ strcpy(name + len, p);
+
return name;
}
@@ -135,21 +142,19 @@ static char *follow_link(char *link)
int len, n;
char *name, *resolved, *end;
- len = 64;
- while (1) {
+ name = __getname();
+ if (!name) {
n = -ENOMEM;
- name = kmalloc(len, GFP_KERNEL);
- if (name == NULL)
- goto out;
-
- n = hostfs_do_readlink(link, name, len);
- if (n < len)
- break;
- len *= 2;
- kfree(name);
+ goto out_free;
}
+
+ n = hostfs_do_readlink(link, name, PATH_MAX);
if (n < 0)
goto out_free;
+ else if (n == PATH_MAX) {
+ n = -E2BIG;
+ goto out_free;
+ }
if (*name == '/')
return name;
@@ -168,13 +173,12 @@ static char *follow_link(char *link)
}
sprintf(resolved, "%s%s", link, name);
- kfree(name);
+ __putname(name);
kfree(link);
return resolved;
out_free:
- kfree(name);
- out:
+ __putname(name);
return ERR_PTR(n);
}
@@ -225,6 +229,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
hi->fd = -1;
hi->mode = 0;
inode_init_once(&hi->vfs_inode);
+ mutex_init(&hi->open_mutex);
return &hi->vfs_inode;
}
@@ -257,6 +262,9 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
if (strlen(root_path) > offset)
seq_printf(seq, ",%s", root_path + offset);
+ if (append)
+ seq_puts(seq, ",append");
+
return 0;
}
@@ -284,6 +292,7 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
if (dir == NULL)
return -error;
next = ctx->pos;
+ seek_dir(dir, next);
while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) {
if (!dir_emit(ctx, name, len, ino, type))
break;
@@ -293,13 +302,12 @@ static int hostfs_readdir(struct file *file, struct dir_context *ctx)
return 0;
}
-static int hostfs_file_open(struct inode *ino, struct file *file)
+static int hostfs_open(struct inode *ino, struct file *file)
{
- static DEFINE_MUTEX(open_mutex);
char *name;
- fmode_t mode = 0;
+ fmode_t mode;
int err;
- int r = 0, w = 0, fd;
+ int r, w, fd;
mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
if ((mode & HOSTFS_I(ino)->mode) == mode)
@@ -308,12 +316,12 @@ static int hostfs_file_open(struct inode *ino, struct file *file)
mode |= HOSTFS_I(ino)->mode;
retry:
+ r = w = 0;
+
if (mode & FMODE_READ)
r = 1;
if (mode & FMODE_WRITE)
- w = 1;
- if (w)
- r = 1;
+ r = w = 1;
name = dentry_name(file->f_path.dentry);
if (name == NULL)
@@ -324,15 +332,16 @@ retry:
if (fd < 0)
return fd;
- mutex_lock(&open_mutex);
+ mutex_lock(&HOSTFS_I(ino)->open_mutex);
/* somebody else had handled it first? */
if ((mode & HOSTFS_I(ino)->mode) == mode) {
- mutex_unlock(&open_mutex);
+ mutex_unlock(&HOSTFS_I(ino)->open_mutex);
+ close_file(&fd);
return 0;
}
if ((mode | HOSTFS_I(ino)->mode) != mode) {
mode |= HOSTFS_I(ino)->mode;
- mutex_unlock(&open_mutex);
+ mutex_unlock(&HOSTFS_I(ino)->open_mutex);
close_file(&fd);
goto retry;
}
@@ -342,12 +351,12 @@ retry:
err = replace_file(fd, HOSTFS_I(ino)->fd);
close_file(&fd);
if (err < 0) {
- mutex_unlock(&open_mutex);
+ mutex_unlock(&HOSTFS_I(ino)->open_mutex);
return err;
}
}
HOSTFS_I(ino)->mode = mode;
- mutex_unlock(&open_mutex);
+ mutex_unlock(&HOSTFS_I(ino)->open_mutex);
return 0;
}
@@ -378,13 +387,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
static const struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.splice_read = generic_file_splice_read,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
- .write = new_sync_write,
.mmap = generic_file_mmap,
- .open = hostfs_file_open,
+ .open = hostfs_open,
.release = hostfs_file_release,
.fsync = hostfs_fsync,
};
@@ -393,6 +400,8 @@ static const struct file_operations hostfs_dir_fops = {
.llseek = generic_file_llseek,
.iterate = hostfs_readdir,
.read = generic_read_dir,
+ .open = hostfs_open,
+ .fsync = hostfs_fsync,
};
static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -400,7 +409,7 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
struct address_space *mapping = page->mapping;
struct inode *inode = mapping->host;
char *buffer;
- unsigned long long base;
+ loff_t base = page_offset(page);
int count = PAGE_CACHE_SIZE;
int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
int err;
@@ -409,7 +418,6 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
count = inode->i_size & (PAGE_CACHE_SIZE-1);
buffer = kmap(page);
- base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
if (err != count) {
@@ -434,26 +442,29 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
static int hostfs_readpage(struct file *file, struct page *page)
{
char *buffer;
- long long start;
- int err = 0;
+ loff_t start = page_offset(page);
+ int bytes_read, ret = 0;
- start = (long long) page->index << PAGE_CACHE_SHIFT;
buffer = kmap(page);
- err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
+ bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
PAGE_CACHE_SIZE);
- if (err < 0)
+ if (bytes_read < 0) {
+ ClearPageUptodate(page);
+ SetPageError(page);
+ ret = bytes_read;
goto out;
+ }
- memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
+ memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
- flush_dcache_page(page);
+ ClearPageError(page);
SetPageUptodate(page);
- if (PageError(page)) ClearPageError(page);
- err = 0;
+
out:
+ flush_dcache_page(page);
kunmap(page);
unlock_page(page);
- return err;
+ return ret;
}
static int hostfs_write_begin(struct file *file, struct address_space *mapping,
@@ -530,11 +541,13 @@ static int read_name(struct inode *ino, char *name)
init_special_inode(ino, st.mode & S_IFMT, rdev);
ino->i_op = &hostfs_iops;
break;
-
- default:
+ case S_IFREG:
ino->i_op = &hostfs_iops;
ino->i_fop = &hostfs_file_fops;
ino->i_mapping->a_ops = &hostfs_aops;
+ break;
+ default:
+ return -EIO;
}
ino->i_ino = st.ino;
@@ -568,10 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (name == NULL)
goto out_put;
- fd = file_create(name,
- mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
- mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
- mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
+ fd = file_create(name, mode & S_IFMT);
if (fd < 0)
error = fd;
else
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 9765dab95cbd..9c1e0f019880 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -97,21 +97,27 @@ void *open_dir(char *path, int *err_out)
return dir;
}
-char *read_dir(void *stream, unsigned long long *pos,
+void seek_dir(void *stream, unsigned long long pos)
+{
+ DIR *dir = stream;
+
+ seekdir(dir, pos);
+}
+
+char *read_dir(void *stream, unsigned long long *pos_out,
unsigned long long *ino_out, int *len_out,
unsigned int *type_out)
{
DIR *dir = stream;
struct dirent *ent;
- seekdir(dir, *pos);
ent = readdir(dir);
if (ent == NULL)
return NULL;
*len_out = strlen(ent->d_name);
*ino_out = ent->d_ino;
*type_out = ent->d_type;
- *pos = telldir(dir);
+ *pos_out = ent->d_off;
return ent->d_name;
}
@@ -175,21 +181,10 @@ void close_dir(void *stream)
closedir(stream);
}
-int file_create(char *name, int ur, int uw, int ux, int gr,
- int gw, int gx, int or, int ow, int ox)
+int file_create(char *name, int mode)
{
- int mode, fd;
-
- mode = 0;
- mode |= ur ? S_IRUSR : 0;
- mode |= uw ? S_IWUSR : 0;
- mode |= ux ? S_IXUSR : 0;
- mode |= gr ? S_IRGRP : 0;
- mode |= gw ? S_IWGRP : 0;
- mode |= gx ? S_IXGRP : 0;
- mode |= or ? S_IROTH : 0;
- mode |= ow ? S_IWOTH : 0;
- mode |= ox ? S_IXOTH : 0;
+ int fd;
+
fd = open64(name, O_CREAT | O_RDWR, mode);
if (fd < 0)
return -errno;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 7f54e5f76cec..6d8cfe9b52d6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -197,9 +197,7 @@ const struct address_space_operations hpfs_aops = {
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c274aca8e8dc..2640d88b0e63 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,6 +34,7 @@
#include <linux/security.h>
#include <linux/magic.h>
#include <linux/migrate.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
@@ -47,9 +48,10 @@ struct hugetlbfs_config {
kuid_t uid;
kgid_t gid;
umode_t mode;
- long nr_blocks;
+ long max_hpages;
long nr_inodes;
struct hstate *hstate;
+ long min_hpages;
};
struct hugetlbfs_inode_info {
@@ -67,7 +69,7 @@ int sysctl_hugetlb_shm_group;
enum {
Opt_size, Opt_nr_inodes,
Opt_mode, Opt_uid, Opt_gid,
- Opt_pagesize,
+ Opt_pagesize, Opt_min_size,
Opt_err,
};
@@ -78,6 +80,7 @@ static const match_table_t tokens = {
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_pagesize, "pagesize=%s"},
+ {Opt_min_size, "min_size=%s"},
{Opt_err, NULL},
};
@@ -179,42 +182,33 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
#endif
-static int
+static size_t
hugetlbfs_read_actor(struct page *page, unsigned long offset,
- char __user *buf, unsigned long count,
- unsigned long size)
+ struct iov_iter *to, unsigned long size)
{
- char *kaddr;
- unsigned long left, copied = 0;
+ size_t copied = 0;
int i, chunksize;
- if (size > count)
- size = count;
-
/* Find which 4k chunk and offset with in that chunk */
i = offset >> PAGE_CACHE_SHIFT;
offset = offset & ~PAGE_CACHE_MASK;
while (size) {
+ size_t n;
chunksize = PAGE_CACHE_SIZE;
if (offset)
chunksize -= offset;
if (chunksize > size)
chunksize = size;
- kaddr = kmap(&page[i]);
- left = __copy_to_user(buf, kaddr + offset, chunksize);
- kunmap(&page[i]);
- if (left) {
- copied += (chunksize - left);
- break;
- }
+ n = copy_page_to_iter(&page[i], offset, chunksize, to);
+ copied += n;
+ if (n != chunksize)
+ return copied;
offset = 0;
size -= chunksize;
- buf += chunksize;
- copied += chunksize;
i++;
}
- return copied ? copied : -EFAULT;
+ return copied;
}
/*
@@ -222,39 +216,34 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
* data. Its *very* similar to do_generic_mapping_read(), we can't use that
* since it has PAGE_CACHE_SIZE assumptions.
*/
-static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct hstate *h = hstate_file(filp);
- struct address_space *mapping = filp->f_mapping;
+ struct file *file = iocb->ki_filp;
+ struct hstate *h = hstate_file(file);
+ struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- unsigned long index = *ppos >> huge_page_shift(h);
- unsigned long offset = *ppos & ~huge_page_mask(h);
+ unsigned long index = iocb->ki_pos >> huge_page_shift(h);
+ unsigned long offset = iocb->ki_pos & ~huge_page_mask(h);
unsigned long end_index;
loff_t isize;
ssize_t retval = 0;
- /* validate length */
- if (len == 0)
- goto out;
-
- for (;;) {
+ while (iov_iter_count(to)) {
struct page *page;
- unsigned long nr, ret;
- int ra;
+ size_t nr, copied;
/* nr is the maximum number of bytes to copy from this page */
nr = huge_page_size(h);
isize = i_size_read(inode);
if (!isize)
- goto out;
+ break;
end_index = (isize - 1) >> huge_page_shift(h);
- if (index >= end_index) {
- if (index > end_index)
- goto out;
+ if (index > end_index)
+ break;
+ if (index == end_index) {
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
if (nr <= offset)
- goto out;
+ break;
}
nr = nr - offset;
@@ -265,39 +254,27 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
* We have a HOLE, zero out the user-buffer for the
* length of the hole or request.
*/
- ret = len < nr ? len : nr;
- if (clear_user(buf, ret))
- ra = -EFAULT;
- else
- ra = 0;
+ copied = iov_iter_zero(nr, to);
} else {
unlock_page(page);
/*
* We have the page, copy it to user space buffer.
*/
- ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
- ret = ra;
+ copied = hugetlbfs_read_actor(page, offset, to, nr);
page_cache_release(page);
}
- if (ra < 0) {
- if (retval == 0)
- retval = ra;
- goto out;
+ offset += copied;
+ retval += copied;
+ if (copied != nr && iov_iter_count(to)) {
+ if (!retval)
+ retval = -EFAULT;
+ break;
}
-
- offset += ret;
- retval += ret;
- len -= ret;
index += offset >> huge_page_shift(h);
offset &= ~huge_page_mask(h);
-
- /* short read or no more work */
- if ((ret != nr) || (len == 0))
- break;
}
-out:
- *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
+ iocb->ki_pos = ((loff_t)index << huge_page_shift(h)) + offset;
return retval;
}
@@ -319,7 +296,7 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
static void truncate_huge_page(struct page *page)
{
- cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
+ ClearPageDirty(page);
ClearPageUptodate(page);
delete_from_page_cache(page);
}
@@ -721,7 +698,7 @@ static void init_once(void *foo)
}
const struct file_operations hugetlbfs_file_operations = {
- .read = hugetlbfs_read,
+ .read_iter = hugetlbfs_read_iter,
.mmap = hugetlbfs_file_mmap,
.fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
@@ -754,14 +731,38 @@ static const struct super_operations hugetlbfs_ops = {
.show_options = generic_show_options,
};
+enum { NO_SIZE, SIZE_STD, SIZE_PERCENT };
+
+/*
+ * Convert size option passed from command line to number of huge pages
+ * in the pool specified by hstate. Size option could be in bytes
+ * (val_type == SIZE_STD) or percentage of the pool (val_type == SIZE_PERCENT).
+ */
+static long long
+hugetlbfs_size_to_hpages(struct hstate *h, unsigned long long size_opt,
+ int val_type)
+{
+ if (val_type == NO_SIZE)
+ return -1;
+
+ if (val_type == SIZE_PERCENT) {
+ size_opt <<= huge_page_shift(h);
+ size_opt *= h->max_huge_pages;
+ do_div(size_opt, 100);
+ }
+
+ size_opt >>= huge_page_shift(h);
+ return size_opt;
+}
+
static int
hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
{
char *p, *rest;
substring_t args[MAX_OPT_ARGS];
int option;
- unsigned long long size = 0;
- enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
+ unsigned long long max_size_opt = 0, min_size_opt = 0;
+ int max_val_type = NO_SIZE, min_val_type = NO_SIZE;
if (!options)
return 0;
@@ -799,10 +800,10 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
/* memparse() will accept a K/M/G without a digit */
if (!isdigit(*args[0].from))
goto bad_val;
- size = memparse(args[0].from, &rest);
- setsize = SIZE_STD;
+ max_size_opt = memparse(args[0].from, &rest);
+ max_val_type = SIZE_STD;
if (*rest == '%')
- setsize = SIZE_PERCENT;
+ max_val_type = SIZE_PERCENT;
break;
}
@@ -825,6 +826,17 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
break;
}
+ case Opt_min_size: {
+ /* memparse() will accept a K/M/G without a digit */
+ if (!isdigit(*args[0].from))
+ goto bad_val;
+ min_size_opt = memparse(args[0].from, &rest);
+ min_val_type = SIZE_STD;
+ if (*rest == '%')
+ min_val_type = SIZE_PERCENT;
+ break;
+ }
+
default:
pr_err("Bad mount option: \"%s\"\n", p);
return -EINVAL;
@@ -832,15 +844,22 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
}
}
- /* Do size after hstate is set up */
- if (setsize > NO_SIZE) {
- struct hstate *h = pconfig->hstate;
- if (setsize == SIZE_PERCENT) {
- size <<= huge_page_shift(h);
- size *= h->max_huge_pages;
- do_div(size, 100);
- }
- pconfig->nr_blocks = (size >> huge_page_shift(h));
+ /*
+ * Use huge page pool size (in hstate) to convert the size
+ * options to number of huge pages. If NO_SIZE, -1 is returned.
+ */
+ pconfig->max_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
+ max_size_opt, max_val_type);
+ pconfig->min_hpages = hugetlbfs_size_to_hpages(pconfig->hstate,
+ min_size_opt, min_val_type);
+
+ /*
+ * If max_size was specified, then min_size must be smaller
+ */
+ if (max_val_type > NO_SIZE &&
+ pconfig->min_hpages > pconfig->max_hpages) {
+ pr_err("minimum size can not be greater than maximum size\n");
+ return -EINVAL;
}
return 0;
@@ -859,12 +878,13 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
save_mount_options(sb, data);
- config.nr_blocks = -1; /* No limit on size by default */
+ config.max_hpages = -1; /* No limit on size by default */
config.nr_inodes = -1; /* No limit on number of inodes by default */
config.uid = current_fsuid();
config.gid = current_fsgid();
config.mode = 0755;
config.hstate = &default_hstate;
+ config.min_hpages = -1; /* No default minimum size */
ret = hugetlbfs_parse_options(data, &config);
if (ret)
return ret;
@@ -878,8 +898,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
sbinfo->max_inodes = config.nr_inodes;
sbinfo->free_inodes = config.nr_inodes;
sbinfo->spool = NULL;
- if (config.nr_blocks != -1) {
- sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
+ /*
+ * Allocate and initialize subpool if maximum or minimum size is
+ * specified. Any needed reservations (for minimim size) are taken
+ * taken when the subpool is created.
+ */
+ if (config.max_hpages != -1 || config.min_hpages != -1) {
+ sbinfo->spool = hugepage_new_subpool(config.hstate,
+ config.max_hpages,
+ config.min_hpages);
if (!sbinfo->spool)
goto out_free;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 64989ca9ba90..f509f62e12f6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -51,9 +51,7 @@ const struct file_operations jffs2_file_operations =
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.unlocked_ioctl=jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d279fb49fe9c..2eac55379239 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -195,7 +195,7 @@ static int do_verify_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_dat
/* unchecked xdatum is chained with c->xattr_unchecked */
list_del_init(&xd->xindex);
- dbg_xattr("success on verfying xdatum (xid=%u, version=%u)\n",
+ dbg_xattr("success on verifying xdatum (xid=%u, version=%u)\n",
xd->xid, xd->version);
return 0;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 10815f8dfd8b..ae46788b9723 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,8 +151,6 @@ const struct inode_operations jfs_file_inode_operations = {
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = new_sync_write,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1ca3c9b..070dc4b33544 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
+#include <linux/uio.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
@@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, jfs_get_block);
}
-static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -339,13 +339,13 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 49ba7ff1bbb9..16a0922beb59 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -183,30 +183,23 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
#endif
-static void init_once(void *foo)
-{
- struct metapage *mp = (struct metapage *)foo;
-
- mp->lid = 0;
- mp->lsn = 0;
- mp->flag = 0;
- mp->data = NULL;
- mp->clsn = 0;
- mp->log = NULL;
- set_bit(META_free, &mp->flag);
- init_waitqueue_head(&mp->wait);
-}
-
static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
{
- return mempool_alloc(metapage_mempool, gfp_mask);
+ struct metapage *mp = mempool_alloc(metapage_mempool, gfp_mask);
+
+ if (mp) {
+ mp->lid = 0;
+ mp->lsn = 0;
+ mp->data = NULL;
+ mp->clsn = 0;
+ mp->log = NULL;
+ init_waitqueue_head(&mp->wait);
+ }
+ return mp;
}
static inline void free_metapage(struct metapage *mp)
{
- mp->flag = 0;
- set_bit(META_free, &mp->flag);
-
mempool_free(mp, metapage_mempool);
}
@@ -216,7 +209,7 @@ int __init metapage_init(void)
* Allocate the metapage structures
*/
metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
- 0, 0, init_once);
+ 0, 0, NULL);
if (metapage_cache == NULL)
return -ENOMEM;
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index a78beda85f68..337e9e51ac06 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -48,7 +48,6 @@ struct metapage {
/* metapage flag */
#define META_locked 0
-#define META_free 1
#define META_dirty 2
#define META_sync 3
#define META_discard 4
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5d30c56ae075..4cd9798f4948 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -102,7 +102,7 @@ void jfs_error(struct super_block *sb, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("ERROR: (device %s): %pf: %pV\n",
+ pr_err("ERROR: (device %s): %ps: %pV\n",
sb->s_id, __builtin_return_address(0), &vaf);
va_end(args);
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index b684e8a132e6..2bacb9988566 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
goto out_free;
}
+ of->event = atomic_read(&of->kn->attr.open->event);
ops = kernfs_ops(of->kn);
if (ops->read)
len = ops->read(of, buf, len, *ppos);
diff --git a/fs/locks.c b/fs/locks.c
index 365c82e1b3a9..653faabb07f4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -203,11 +203,11 @@ static struct kmem_cache *flctx_cache __read_mostly;
static struct kmem_cache *filelock_cache __read_mostly;
static struct file_lock_context *
-locks_get_lock_context(struct inode *inode)
+locks_get_lock_context(struct inode *inode, int type)
{
struct file_lock_context *new;
- if (likely(inode->i_flctx))
+ if (likely(inode->i_flctx) || type == F_UNLCK)
goto out;
new = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
@@ -223,14 +223,7 @@ locks_get_lock_context(struct inode *inode)
* Assign the pointer if it's not already assigned. If it is, then
* free the context we just allocated.
*/
- spin_lock(&inode->i_lock);
- if (likely(!inode->i_flctx)) {
- inode->i_flctx = new;
- new = NULL;
- }
- spin_unlock(&inode->i_lock);
-
- if (new)
+ if (cmpxchg(&inode->i_flctx, NULL, new))
kmem_cache_free(flctx_cache, new);
out:
return inode->i_flctx;
@@ -276,8 +269,10 @@ void locks_release_private(struct file_lock *fl)
}
if (fl->fl_lmops) {
- if (fl->fl_lmops->lm_put_owner)
- fl->fl_lmops->lm_put_owner(fl);
+ if (fl->fl_lmops->lm_put_owner) {
+ fl->fl_lmops->lm_put_owner(fl->fl_owner);
+ fl->fl_owner = NULL;
+ }
fl->fl_lmops = NULL;
}
}
@@ -333,7 +328,7 @@ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
if (fl->fl_lmops) {
if (fl->fl_lmops->lm_get_owner)
- fl->fl_lmops->lm_get_owner(new, fl);
+ fl->fl_lmops->lm_get_owner(fl->fl_owner);
}
}
EXPORT_SYMBOL(locks_copy_conflock);
@@ -592,11 +587,15 @@ posix_owner_key(struct file_lock *fl)
static void locks_insert_global_blocked(struct file_lock *waiter)
{
+ lockdep_assert_held(&blocked_lock_lock);
+
hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
}
static void locks_delete_global_blocked(struct file_lock *waiter)
{
+ lockdep_assert_held(&blocked_lock_lock);
+
hash_del(&waiter->fl_link);
}
@@ -730,7 +729,7 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
/* POSIX locks owned by the same process do not conflict with
* each other.
*/
- if (!IS_POSIX(sys_fl) || posix_same_owner(caller_fl, sys_fl))
+ if (posix_same_owner(caller_fl, sys_fl))
return (0);
/* Check whether they overlap */
@@ -748,7 +747,7 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
/* FLOCK locks referring to the same filp do not conflict with
* each other.
*/
- if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
+ if (caller_fl->fl_file == sys_fl->fl_file)
return (0);
if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
return 0;
@@ -838,6 +837,8 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
{
int i = 0;
+ lockdep_assert_held(&blocked_lock_lock);
+
/*
* This deadlock detector can't reasonably detect deadlocks with
* FL_OFDLCK locks, since they aren't owned by a process, per-se.
@@ -871,9 +872,12 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
bool found = false;
LIST_HEAD(dispose);
- ctx = locks_get_lock_context(inode);
- if (!ctx)
- return -ENOMEM;
+ ctx = locks_get_lock_context(inode, request->fl_type);
+ if (!ctx) {
+ if (request->fl_type != F_UNLCK)
+ return -ENOMEM;
+ return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
+ }
if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
new_fl = locks_alloc_lock();
@@ -939,9 +943,9 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
bool added = false;
LIST_HEAD(dispose);
- ctx = locks_get_lock_context(inode);
+ ctx = locks_get_lock_context(inode, request->fl_type);
if (!ctx)
- return -ENOMEM;
+ return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
/*
* We may need two file_lock structures for this operation,
@@ -964,8 +968,6 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
*/
if (request->fl_type != F_UNLCK) {
list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- if (!IS_POSIX(fl))
- continue;
if (!posix_locks_conflict(request, fl))
continue;
if (conflock)
@@ -1388,9 +1390,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
{
int error = 0;
- struct file_lock *new_fl;
struct file_lock_context *ctx = inode->i_flctx;
- struct file_lock *fl;
+ struct file_lock *new_fl, *fl, *tmp;
unsigned long break_time;
int want_write = (mode & O_ACCMODE) != O_RDONLY;
LIST_HEAD(dispose);
@@ -1420,7 +1421,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
break_time++; /* so that 0 means no break time */
}
- list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+ list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
if (!leases_conflict(fl, new_fl))
continue;
if (want_write) {
@@ -1606,7 +1607,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
lease = *flp;
trace_generic_add_lease(inode, lease);
- ctx = locks_get_lock_context(inode);
+ /* Note that arg is never F_UNLCK here */
+ ctx = locks_get_lock_context(inode, arg);
if (!ctx)
return -ENOMEM;
@@ -1665,7 +1667,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
}
if (my_fl != NULL) {
- error = lease->fl_lmops->lm_change(my_fl, arg, &dispose);
+ lease = my_fl;
+ error = lease->fl_lmops->lm_change(lease, arg, &dispose);
if (error)
goto out;
goto out_setup;
@@ -1727,7 +1730,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
break;
}
}
- trace_generic_delete_lease(inode, fl);
+ trace_generic_delete_lease(inode, victim);
if (victim)
error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
@@ -2555,15 +2558,10 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
: (fl->fl_type == F_WRLCK) ? "WRITE" : "READ ");
}
if (inode) {
-#ifdef WE_CAN_BREAK_LSLK_NOW
- seq_printf(f, "%d %s:%ld ", fl_pid,
- inode->i_sb->s_id, inode->i_ino);
-#else
- /* userspace relies on this representation of dev_t ;-( */
+ /* userspace relies on this representation of dev_t */
seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
-#endif
} else {
seq_printf(f, "%d <none>:0 ", fl_pid);
}
@@ -2592,6 +2590,44 @@ static int locks_show(struct seq_file *f, void *v)
return 0;
}
+static void __show_fd_locks(struct seq_file *f,
+ struct list_head *head, int *id,
+ struct file *filp, struct files_struct *files)
+{
+ struct file_lock *fl;
+
+ list_for_each_entry(fl, head, fl_list) {
+
+ if (filp != fl->fl_file)
+ continue;
+ if (fl->fl_owner != files &&
+ fl->fl_owner != filp)
+ continue;
+
+ (*id)++;
+ seq_puts(f, "lock:\t");
+ lock_get_status(f, fl, *id, "");
+ }
+}
+
+void show_fd_locks(struct seq_file *f,
+ struct file *filp, struct files_struct *files)
+{
+ struct inode *inode = file_inode(filp);
+ struct file_lock_context *ctx;
+ int id = 0;
+
+ ctx = inode->i_flctx;
+ if (!ctx)
+ return;
+
+ spin_lock(&ctx->flc_lock);
+ __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
+ __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
+ __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
+ spin_unlock(&ctx->flc_lock);
+}
+
static void *locks_start(struct seq_file *f, loff_t *pos)
__acquires(&blocked_lock_lock)
{
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 8538752df2f6..b2c13f739ffa 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -271,8 +271,6 @@ const struct file_operations logfs_reg_fops = {
.llseek = generic_file_llseek,
.mmap = generic_file_readonly_mmap,
.open = generic_file_open,
- .read = new_sync_read,
- .write = new_sync_write,
};
const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index a967de085ac0..6d63e27ec961 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -14,9 +14,7 @@
*/
const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
diff --git a/fs/namei.c b/fs/namei.c
index c83145af4bfc..ffab2e06e147 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -119,15 +119,14 @@
* PATH_MAX includes the nul terminator --RR.
*/
-#define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename))
+#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))
struct filename *
getname_flags(const char __user *filename, int flags, int *empty)
{
- struct filename *result, *err;
- int len;
- long max;
+ struct filename *result;
char *kname;
+ int len;
result = audit_reusename(filename);
if (result)
@@ -136,22 +135,18 @@ getname_flags(const char __user *filename, int flags, int *empty)
result = __getname();
if (unlikely(!result))
return ERR_PTR(-ENOMEM);
- result->refcnt = 1;
/*
* First, try to embed the struct filename inside the names_cache
* allocation
*/
- kname = (char *)result + sizeof(*result);
+ kname = (char *)result->iname;
result->name = kname;
- result->separate = false;
- max = EMBEDDED_NAME_MAX;
-recopy:
- len = strncpy_from_user(kname, filename, max);
+ len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
if (unlikely(len < 0)) {
- err = ERR_PTR(len);
- goto error;
+ __putname(result);
+ return ERR_PTR(len);
}
/*
@@ -160,43 +155,49 @@ recopy:
* names_cache allocation for the pathname, and re-do the copy from
* userland.
*/
- if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
+ if (unlikely(len == EMBEDDED_NAME_MAX)) {
+ const size_t size = offsetof(struct filename, iname[1]);
kname = (char *)result;
- result = kzalloc(sizeof(*result), GFP_KERNEL);
- if (!result) {
- err = ERR_PTR(-ENOMEM);
- result = (struct filename *)kname;
- goto error;
+ /*
+ * size is chosen that way we to guarantee that
+ * result->iname[0] is within the same object and that
+ * kname can't be equal to result->iname, no matter what.
+ */
+ result = kzalloc(size, GFP_KERNEL);
+ if (unlikely(!result)) {
+ __putname(kname);
+ return ERR_PTR(-ENOMEM);
}
result->name = kname;
- result->separate = true;
- result->refcnt = 1;
- max = PATH_MAX;
- goto recopy;
+ len = strncpy_from_user(kname, filename, PATH_MAX);
+ if (unlikely(len < 0)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(len);
+ }
+ if (unlikely(len == PATH_MAX)) {
+ __putname(kname);
+ kfree(result);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
}
+ result->refcnt = 1;
/* The empty path is special. */
if (unlikely(!len)) {
if (empty)
*empty = 1;
- err = ERR_PTR(-ENOENT);
- if (!(flags & LOOKUP_EMPTY))
- goto error;
+ if (!(flags & LOOKUP_EMPTY)) {
+ putname(result);
+ return ERR_PTR(-ENOENT);
+ }
}
- err = ERR_PTR(-ENAMETOOLONG);
- if (unlikely(len >= PATH_MAX))
- goto error;
-
result->uptr = filename;
result->aname = NULL;
audit_getname(result);
return result;
-
-error:
- putname(result);
- return err;
}
struct filename *
@@ -216,8 +217,7 @@ getname_kernel(const char * filename)
return ERR_PTR(-ENOMEM);
if (len <= EMBEDDED_NAME_MAX) {
- result->name = (char *)(result) + sizeof(*result);
- result->separate = false;
+ result->name = (char *)result->iname;
} else if (len <= PATH_MAX) {
struct filename *tmp;
@@ -227,7 +227,6 @@ getname_kernel(const char * filename)
return ERR_PTR(-ENOMEM);
}
tmp->name = (char *)result;
- tmp->separate = true;
result = tmp;
} else {
__putname(result);
@@ -249,7 +248,7 @@ void putname(struct filename *name)
if (--name->refcnt > 0)
return;
- if (name->separate) {
+ if (name->name != name->iname) {
__putname(name->name);
kfree(name);
} else
@@ -1586,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
inode = path->dentry->d_inode;
}
err = -ENOENT;
- if (!inode || d_is_negative(path->dentry))
+ if (d_is_negative(path->dentry))
goto out_path_put;
if (should_follow_link(path->dentry, follow)) {
@@ -1851,10 +1850,11 @@ static int link_path_walk(const char *name, struct nameidata *nd)
return err;
}
-static int path_init(int dfd, const char *name, unsigned int flags,
+static int path_init(int dfd, const struct filename *name, unsigned int flags,
struct nameidata *nd)
{
int retval = 0;
+ const char *s = name->name;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
@@ -1863,7 +1863,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
if (flags & LOOKUP_ROOT) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
- if (*name) {
+ if (*s) {
if (!d_can_lookup(root))
return -ENOTDIR;
retval = inode_permission(inode, MAY_EXEC);
@@ -1885,7 +1885,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->root.mnt = NULL;
nd->m_seq = read_seqbegin(&mount_lock);
- if (*name=='/') {
+ if (*s == '/') {
if (flags & LOOKUP_RCU) {
rcu_read_lock();
nd->seq = set_root_rcu(nd);
@@ -1919,7 +1919,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
dentry = f.file->f_path.dentry;
- if (*name) {
+ if (*s) {
if (!d_can_lookup(dentry)) {
fdput(f);
return -ENOTDIR;
@@ -1949,7 +1949,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
return -ECHILD;
done:
current->total_link_count = 0;
- return link_path_walk(name, nd);
+ return link_path_walk(s, nd);
}
static void path_cleanup(struct nameidata *nd)
@@ -1972,7 +1972,7 @@ static inline int lookup_last(struct nameidata *nd, struct path *path)
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int path_lookupat(int dfd, const char *name,
+static int path_lookupat(int dfd, const struct filename *name,
unsigned int flags, struct nameidata *nd)
{
struct path path;
@@ -2027,31 +2027,17 @@ static int path_lookupat(int dfd, const char *name,
static int filename_lookup(int dfd, struct filename *name,
unsigned int flags, struct nameidata *nd)
{
- int retval = path_lookupat(dfd, name->name, flags | LOOKUP_RCU, nd);
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
if (unlikely(retval == -ECHILD))
- retval = path_lookupat(dfd, name->name, flags, nd);
+ retval = path_lookupat(dfd, name, flags, nd);
if (unlikely(retval == -ESTALE))
- retval = path_lookupat(dfd, name->name,
- flags | LOOKUP_REVAL, nd);
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
if (likely(!retval))
audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT);
return retval;
}
-static int do_path_lookup(int dfd, const char *name,
- unsigned int flags, struct nameidata *nd)
-{
- struct filename *filename = getname_kernel(name);
- int retval = PTR_ERR(filename);
-
- if (!IS_ERR(filename)) {
- retval = filename_lookup(dfd, filename, flags, nd);
- putname(filename);
- }
- return retval;
-}
-
/* does lookup, returns the object with parent locked */
struct dentry *kern_path_locked(const char *name, struct path *path)
{
@@ -2089,9 +2075,15 @@ out:
int kern_path(const char *name, unsigned int flags, struct path *path)
{
struct nameidata nd;
- int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
- if (!res)
- *path = nd.path;
+ struct filename *filename = getname_kernel(name);
+ int res = PTR_ERR(filename);
+
+ if (!IS_ERR(filename)) {
+ res = filename_lookup(AT_FDCWD, filename, flags, &nd);
+ putname(filename);
+ if (!res)
+ *path = nd.path;
+ }
return res;
}
EXPORT_SYMBOL(kern_path);
@@ -2108,15 +2100,22 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct path *path)
{
- struct nameidata nd;
- int err;
- nd.root.dentry = dentry;
- nd.root.mnt = mnt;
+ struct filename *filename = getname_kernel(name);
+ int err = PTR_ERR(filename);
+
BUG_ON(flags & LOOKUP_PARENT);
- /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
- err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
- if (!err)
- *path = nd.path;
+
+ /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */
+ if (!IS_ERR(filename)) {
+ struct nameidata nd;
+ nd.root.dentry = dentry;
+ nd.root.mnt = mnt;
+ err = filename_lookup(AT_FDCWD, filename,
+ flags | LOOKUP_ROOT, &nd);
+ if (!err)
+ *path = nd.path;
+ putname(filename);
+ }
return err;
}
EXPORT_SYMBOL(vfs_path_lookup);
@@ -2138,9 +2137,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
* @len: maximum length @len should be interpreted to
*
* Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code. Also note that by using this function the
- * nameidata argument is passed to the filesystem methods and a filesystem
- * using this helper needs to be prepared for that.
+ * not be called by generic code.
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
@@ -2313,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
mutex_unlock(&dir->d_inode->i_mutex);
done:
- if (!dentry->d_inode || d_is_negative(dentry)) {
+ if (d_is_negative(dentry)) {
error = -ENOENT;
dput(dentry);
goto out;
@@ -2341,7 +2338,8 @@ out:
* Returns 0 and "path" will be valid on success; Returns error otherwise.
*/
static int
-path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const struct filename *name, struct path *path,
+ unsigned int flags)
{
struct nameidata nd;
int err;
@@ -2370,20 +2368,20 @@ out:
}
static int
-filename_mountpoint(int dfd, struct filename *s, struct path *path,
+filename_mountpoint(int dfd, struct filename *name, struct path *path,
unsigned int flags)
{
int error;
- if (IS_ERR(s))
- return PTR_ERR(s);
- error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+ if (IS_ERR(name))
+ return PTR_ERR(name);
+ error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU);
if (unlikely(error == -ECHILD))
- error = path_mountpoint(dfd, s->name, path, flags);
+ error = path_mountpoint(dfd, name, path, flags);
if (unlikely(error == -ESTALE))
- error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+ error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL);
if (likely(!error))
- audit_inode(s, path->dentry, 0);
- putname(s);
+ audit_inode(name, path->dentry, 0);
+ putname(name);
return error;
}
@@ -3040,7 +3038,7 @@ retry_lookup:
finish_lookup:
/* we _can_ be in RCU mode here */
error = -ENOENT;
- if (!inode || d_is_negative(path->dentry)) {
+ if (d_is_negative(path->dentry)) {
path_to_nameidata(path, nd);
goto out;
}
@@ -3079,7 +3077,7 @@ finish_open:
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
- if (!S_ISREG(nd->inode->i_mode))
+ if (!d_is_reg(nd->path.dentry))
will_truncate = false;
if (will_truncate) {
@@ -3156,7 +3154,7 @@ static int do_tmpfile(int dfd, struct filename *pathname,
static const struct qstr name = QSTR_INIT("/", 1);
struct dentry *dentry, *child;
struct inode *dir;
- int error = path_lookupat(dfd, pathname->name,
+ int error = path_lookupat(dfd, pathname,
flags | LOOKUP_DIRECTORY, nd);
if (unlikely(error))
return error;
@@ -3229,7 +3227,7 @@ static struct file *path_openat(int dfd, struct filename *pathname,
goto out;
}
- error = path_init(dfd, pathname->name, flags, nd);
+ error = path_init(dfd, pathname, flags, nd);
if (unlikely(error))
goto out;
diff --git a/fs/namespace.c b/fs/namespace.c
index 82ef1405260e..1f4f9dac6e5a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -632,14 +632,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
*/
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
{
- struct mount *p, *res;
- res = p = __lookup_mnt(mnt, dentry);
+ struct mount *p, *res = NULL;
+ p = __lookup_mnt(mnt, dentry);
if (!p)
goto out;
+ if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+ res = p;
hlist_for_each_entry_continue(p, mnt_hash) {
if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
break;
- res = p;
+ if (!(p->mnt.mnt_flags & MNT_UMOUNT))
+ res = p;
}
out:
return res;
@@ -795,10 +798,8 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
/*
* vfsmount lock must be held for write
*/
-static void detach_mnt(struct mount *mnt, struct path *old_path)
+static void unhash_mnt(struct mount *mnt)
{
- old_path->dentry = mnt->mnt_mountpoint;
- old_path->mnt = &mnt->mnt_parent->mnt;
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
@@ -811,6 +812,26 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
/*
* vfsmount lock must be held for write
*/
+static void detach_mnt(struct mount *mnt, struct path *old_path)
+{
+ old_path->dentry = mnt->mnt_mountpoint;
+ old_path->mnt = &mnt->mnt_parent->mnt;
+ unhash_mnt(mnt);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+static void umount_mnt(struct mount *mnt)
+{
+ /* old mountpoint will be dropped when we can do that */
+ mnt->mnt_ex_mountpoint = mnt->mnt_mountpoint;
+ unhash_mnt(mnt);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
void mnt_set_mountpoint(struct mount *mnt,
struct mountpoint *mp,
struct mount *child_mnt)
@@ -1078,6 +1099,13 @@ static void mntput_no_expire(struct mount *mnt)
rcu_read_unlock();
list_del(&mnt->mnt_instance);
+
+ if (unlikely(!list_empty(&mnt->mnt_mounts))) {
+ struct mount *p, *tmp;
+ list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
+ umount_mnt(p);
+ }
+ }
unlock_mount_hash();
if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
@@ -1298,17 +1326,15 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static void namespace_unlock(void)
{
- struct hlist_head head = unmounted;
+ struct hlist_head head;
- if (likely(hlist_empty(&head))) {
- up_write(&namespace_sem);
- return;
- }
+ hlist_move_list(&unmounted, &head);
- head.first->pprev = &head.first;
- INIT_HLIST_HEAD(&unmounted);
up_write(&namespace_sem);
+ if (likely(hlist_empty(&head)))
+ return;
+
synchronize_rcu();
group_pin_kill(&head);
@@ -1319,49 +1345,63 @@ static inline void namespace_lock(void)
down_write(&namespace_sem);
}
+enum umount_tree_flags {
+ UMOUNT_SYNC = 1,
+ UMOUNT_PROPAGATE = 2,
+ UMOUNT_CONNECTED = 4,
+};
/*
* mount_lock must be held
* namespace_sem must be held for write
- * how = 0 => just this tree, don't propagate
- * how = 1 => propagate; we know that nobody else has reference to any victims
- * how = 2 => lazy umount
*/
-void umount_tree(struct mount *mnt, int how)
+static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
{
- HLIST_HEAD(tmp_list);
+ LIST_HEAD(tmp_list);
struct mount *p;
+ if (how & UMOUNT_PROPAGATE)
+ propagate_mount_unlock(mnt);
+
+ /* Gather the mounts to umount */
for (p = mnt; p; p = next_mnt(p, mnt)) {
- hlist_del_init_rcu(&p->mnt_hash);
- hlist_add_head(&p->mnt_hash, &tmp_list);
+ p->mnt.mnt_flags |= MNT_UMOUNT;
+ list_move(&p->mnt_list, &tmp_list);
}
- hlist_for_each_entry(p, &tmp_list, mnt_hash)
+ /* Hide the mounts from mnt_mounts */
+ list_for_each_entry(p, &tmp_list, mnt_list) {
list_del_init(&p->mnt_child);
+ }
- if (how)
+ /* Add propogated mounts to the tmp_list */
+ if (how & UMOUNT_PROPAGATE)
propagate_umount(&tmp_list);
- while (!hlist_empty(&tmp_list)) {
- p = hlist_entry(tmp_list.first, struct mount, mnt_hash);
- hlist_del_init_rcu(&p->mnt_hash);
+ while (!list_empty(&tmp_list)) {
+ bool disconnect;
+ p = list_first_entry(&tmp_list, struct mount, mnt_list);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL;
- if (how < 2)
+ if (how & UMOUNT_SYNC)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
- pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt, &unmounted);
+ disconnect = !(((how & UMOUNT_CONNECTED) &&
+ mnt_has_parent(p) &&
+ (p->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) ||
+ IS_MNT_LOCKED_AND_LAZY(p));
+
+ pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
+ disconnect ? &unmounted : NULL);
if (mnt_has_parent(p)) {
- hlist_del_init(&p->mnt_mp_list);
- put_mountpoint(p->mnt_mp);
mnt_add_count(p->mnt_parent, -1);
- /* old mountpoint will be dropped when we can do that */
- p->mnt_ex_mountpoint = p->mnt_mountpoint;
- p->mnt_mountpoint = p->mnt.mnt_root;
- p->mnt_parent = p;
- p->mnt_mp = NULL;
+ if (!disconnect) {
+ /* Don't forget about p */
+ list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
+ } else {
+ umount_mnt(p);
+ }
}
change_mnt_propagation(p, MS_PRIVATE);
}
@@ -1447,14 +1487,14 @@ static int do_umount(struct mount *mnt, int flags)
if (flags & MNT_DETACH) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 2);
+ umount_tree(mnt, UMOUNT_PROPAGATE);
retval = 0;
} else {
shrink_submounts(mnt);
retval = -EBUSY;
if (!propagate_mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 1);
+ umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
retval = 0;
}
}
@@ -1480,13 +1520,20 @@ void __detach_mounts(struct dentry *dentry)
namespace_lock();
mp = lookup_mountpoint(dentry);
- if (!mp)
+ if (IS_ERR_OR_NULL(mp))
goto out_unlock;
lock_mount_hash();
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
- umount_tree(mnt, 2);
+ if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
+ struct mount *p, *tmp;
+ list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) {
+ hlist_add_head(&p->mnt_umount.s_list, &unmounted);
+ umount_mnt(p);
+ }
+ }
+ else umount_tree(mnt, UMOUNT_CONNECTED);
}
unlock_mount_hash();
put_mountpoint(mp);
@@ -1648,7 +1695,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
out:
if (res) {
lock_mount_hash();
- umount_tree(res, 0);
+ umount_tree(res, UMOUNT_SYNC);
unlock_mount_hash();
}
return q;
@@ -1660,8 +1707,11 @@ struct vfsmount *collect_mounts(struct path *path)
{
struct mount *tree;
namespace_lock();
- tree = copy_tree(real_mount(path->mnt), path->dentry,
- CL_COPY_ALL | CL_PRIVATE);
+ if (!check_mnt(real_mount(path->mnt)))
+ tree = ERR_PTR(-EINVAL);
+ else
+ tree = copy_tree(real_mount(path->mnt), path->dentry,
+ CL_COPY_ALL | CL_PRIVATE);
namespace_unlock();
if (IS_ERR(tree))
return ERR_CAST(tree);
@@ -1672,7 +1722,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
lock_mount_hash();
- umount_tree(real_mount(mnt), 0);
+ umount_tree(real_mount(mnt), UMOUNT_SYNC);
unlock_mount_hash();
namespace_unlock();
}
@@ -1855,7 +1905,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
out_cleanup_ids:
while (!hlist_empty(&tree_list)) {
child = hlist_entry(tree_list.first, struct mount, mnt_hash);
- umount_tree(child, 0);
+ umount_tree(child, UMOUNT_SYNC);
}
unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
@@ -2035,7 +2085,7 @@ static int do_loopback(struct path *path, const char *old_name,
err = graft_tree(mnt, parent, mp);
if (err) {
lock_mount_hash();
- umount_tree(mnt, 0);
+ umount_tree(mnt, UMOUNT_SYNC);
unlock_mount_hash();
}
out2:
@@ -2406,7 +2456,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
while (!list_empty(&graveyard)) {
mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
touch_mnt_namespace(mnt->mnt_ns);
- umount_tree(mnt, 1);
+ umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
}
unlock_mount_hash();
namespace_unlock();
@@ -2477,7 +2527,7 @@ static void shrink_submounts(struct mount *mnt)
m = list_first_entry(&graveyard, struct mount,
mnt_expire);
touch_mnt_namespace(m->mnt_ns);
- umount_tree(m, 1);
+ umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
}
}
}
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 1dd7007f974d..011324ce9df2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -98,30 +98,24 @@ out:
}
static ssize_t
-ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
size_t already_read = 0;
- off_t pos;
+ off_t pos = iocb->ki_pos;
size_t bufsize;
int error;
- void* freepage;
+ void *freepage;
size_t freelen;
ncp_dbg(1, "enter %pD2\n", file);
- pos = *ppos;
-
- if ((ssize_t) count < 0) {
- return -EINVAL;
- }
- if (!count)
+ if (!iov_iter_count(to))
return 0;
if (pos > inode->i_sb->s_maxbytes)
return 0;
- if (pos + count > inode->i_sb->s_maxbytes) {
- count = inode->i_sb->s_maxbytes - pos;
- }
+ iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos);
error = ncp_make_open(inode, O_RDONLY);
if (error) {
@@ -138,31 +132,29 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
goto outrel;
error = 0;
/* First read in as much as possible for each bufsize. */
- while (already_read < count) {
+ while (iov_iter_count(to)) {
int read_this_time;
- size_t to_read = min_t(unsigned int,
+ size_t to_read = min_t(size_t,
bufsize - (pos % bufsize),
- count - already_read);
+ iov_iter_count(to));
error = ncp_read_bounce(NCP_SERVER(inode),
NCP_FINFO(inode)->file_handle,
- pos, to_read, buf, &read_this_time,
+ pos, to_read, to, &read_this_time,
freepage, freelen);
if (error) {
error = -EIO; /* NW errno -> Linux errno */
break;
}
pos += read_this_time;
- buf += read_this_time;
already_read += read_this_time;
- if (read_this_time != to_read) {
+ if (read_this_time != to_read)
break;
- }
}
vfree(freepage);
- *ppos = pos;
+ iocb->ki_pos = pos;
file_accessed(file);
@@ -173,42 +165,21 @@ outrel:
}
static ssize_t
-ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
size_t already_written = 0;
- off_t pos;
size_t bufsize;
int errno;
- void* bouncebuffer;
+ void *bouncebuffer;
+ off_t pos;
ncp_dbg(1, "enter %pD2\n", file);
- if ((ssize_t) count < 0)
- return -EINVAL;
- pos = *ppos;
- if (file->f_flags & O_APPEND) {
- pos = i_size_read(inode);
- }
+ errno = generic_write_checks(iocb, from);
+ if (errno <= 0)
+ return errno;
- if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) {
- if (pos >= MAX_NON_LFS) {
- return -EFBIG;
- }
- if (count > MAX_NON_LFS - (u32)pos) {
- count = MAX_NON_LFS - (u32)pos;
- }
- }
- if (pos >= inode->i_sb->s_maxbytes) {
- if (count || pos > inode->i_sb->s_maxbytes) {
- return -EFBIG;
- }
- }
- if (pos + count > inode->i_sb->s_maxbytes) {
- count = inode->i_sb->s_maxbytes - pos;
- }
-
- if (!count)
- return 0;
errno = ncp_make_open(inode, O_WRONLY);
if (errno) {
ncp_dbg(1, "open failed, error=%d\n", errno);
@@ -216,8 +187,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
}
bufsize = NCP_SERVER(inode)->buffer_size;
- already_written = 0;
-
errno = file_update_time(file);
if (errno)
goto outrel;
@@ -227,13 +196,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
errno = -EIO; /* -ENOMEM */
goto outrel;
}
- while (already_written < count) {
+ pos = iocb->ki_pos;
+ while (iov_iter_count(from)) {
int written_this_time;
- size_t to_write = min_t(unsigned int,
+ size_t to_write = min_t(size_t,
bufsize - (pos % bufsize),
- count - already_written);
+ iov_iter_count(from));
- if (copy_from_user(bouncebuffer, buf, to_write)) {
+ if (copy_from_iter(bouncebuffer, to_write, from) != to_write) {
errno = -EFAULT;
break;
}
@@ -244,16 +214,14 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
break;
}
pos += written_this_time;
- buf += written_this_time;
already_written += written_this_time;
- if (written_this_time != to_write) {
+ if (written_this_time != to_write)
break;
- }
}
vfree(bouncebuffer);
- *ppos = pos;
+ iocb->ki_pos = pos;
if (pos > i_size_read(inode)) {
mutex_lock(&inode->i_mutex);
@@ -277,8 +245,8 @@ static int ncp_release(struct inode *inode, struct file *file) {
const struct file_operations ncp_file_operations =
{
.llseek = generic_file_llseek,
- .read = ncp_file_read,
- .write = ncp_file_write,
+ .read_iter = ncp_file_read_iter,
+ .write_iter = ncp_file_write_iter,
.unlocked_ioctl = ncp_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ncp_compat_ioctl,
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c
index 482387532f54..2b502a0d7941 100644
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -1001,8 +1001,8 @@ out:
*/
int
ncp_read_bounce(struct ncp_server *server, const char *file_id,
- __u32 offset, __u16 to_read, char __user *target, int *bytes_read,
- void* bounce, __u32 bufsize)
+ __u32 offset, __u16 to_read, struct iov_iter *to,
+ int *bytes_read, void *bounce, __u32 bufsize)
{
int result;
@@ -1025,7 +1025,7 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id,
(offset & 1);
*bytes_read = len;
result = 0;
- if (copy_to_user(target, source, len))
+ if (copy_to_iter(source, len, to) != len)
result = -EFAULT;
}
}
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 250e443a07f3..5233fbc1747a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -53,7 +53,7 @@ static inline int ncp_read_bounce_size(__u32 size) {
return sizeof(struct ncp_reply_header) + 2 + 2 + size + 8;
};
int ncp_read_bounce(struct ncp_server *, const char *, __u32, __u16,
- char __user *, int *, void* bounce, __u32 bouncelen);
+ struct iov_iter *, int *, void *bounce, __u32 bouncelen);
int ncp_read_kernel(struct ncp_server *, const char *, __u32, __u16,
char *, int *);
int ncp_write_kernel(struct ncp_server *, const char *, __u32, __u16,
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index c7abc10279af..f31fd0dd92c6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,6 +1,6 @@
config NFS_FS
tristate "NFS client support"
- depends on INET && FILE_LOCKING
+ depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD
select SUNRPC
select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f9f4845db989..19874151e95c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
static bool nfs_client_init_is_complete(const struct nfs_client *clp)
{
- return clp->cl_cons_state != NFS_CS_INITING;
+ return clp->cl_cons_state <= NFS_CS_READY;
}
int nfs_wait_client_init_complete(const struct nfs_client *clp)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index a1f0685b42ff..a6ad68865880 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
clear_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags);
spin_unlock(&delegation->lock);
- put_rpccred(oldcred);
rcu_read_unlock();
+ put_rpccred(oldcred);
trace_nfs4_reclaim_delegation(inode, res->delegation_type);
} else {
/* We appear to have raced with a delegation return. */
@@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = NULL;
goto out;
}
- freeme = nfs_detach_delegation_locked(nfsi,
+ if (test_and_set_bit(NFS_DELEGATION_RETURNING,
+ &old_delegation->flags))
+ goto out;
+ freeme = nfs_detach_delegation_locked(nfsi,
old_delegation, clp);
if (freeme == NULL)
goto out;
@@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
+ if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ goto out;
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
@@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
ret = true;
spin_unlock(&delegation->lock);
}
+out:
return ret;
}
@@ -471,14 +477,20 @@ restart:
super_list) {
if (!nfs_delegation_need_return(delegation))
continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
+ if (!nfs_sb_active(server->super))
continue;
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL) {
+ rcu_read_unlock();
+ nfs_sb_deactive(server->super);
+ goto restart;
+ }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
+ nfs_sb_deactive(server->super);
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
@@ -809,19 +821,30 @@ restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
+ if (test_bit(NFS_DELEGATION_RETURNING,
+ &delegation->flags))
+ continue;
if (test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL)
+ if (!nfs_sb_active(server->super))
continue;
- delegation = nfs_detach_delegation(NFS_I(inode),
- delegation, server);
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL) {
+ rcu_read_unlock();
+ nfs_sb_deactive(server->super);
+ goto restart;
+ }
+ delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
-
- if (delegation != NULL)
- nfs_free_delegation(delegation);
+ if (delegation != NULL) {
+ delegation = nfs_detach_delegation(NFS_I(inode),
+ delegation, server);
+ if (delegation != NULL)
+ nfs_free_delegation(delegation);
+ }
iput(inode);
+ nfs_sb_deactive(server->super);
goto restart;
}
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b0c55cb2a2e..c19e16f0b2d0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
return 0;
}
+/* Match file and dirent using either filehandle or fileid
+ * Note: caller is responsible for checking the fsid
+ */
static
int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
{
+ struct nfs_inode *nfsi;
+
if (dentry->d_inode == NULL)
goto different;
- if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
- goto different;
- return 1;
+
+ nfsi = NFS_I(dentry->d_inode);
+ if (entry->fattr->fileid == nfsi->fileid)
+ return 1;
+ if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
+ return 1;
different:
return 0;
}
@@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
struct inode *inode;
int status;
+ if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
+ return;
+ if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
+ return;
if (filename.name[0] == '.') {
if (filename.len == 1)
return;
@@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
dentry = d_lookup(parent, &filename);
if (dentry != NULL) {
+ /* Is there a mountpoint here? If so, just exit */
+ if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
+ &entry->fattr->fsid))
+ goto out;
if (nfs_same_file(dentry, entry)) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8cf732e..682f65fe09b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
/**
* nfs_direct_IO - NFS address space operation for direct I/O
- * @rw: direction (read or write)
* @iocb: target I/O control block
* @iov: array of vectors that define I/O buffer
* @pos: offset in file to begin the operation
@@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
* shunt off direct read and write requests before the VFS gets them,
* so this method is only ever called for swap.
*/
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
+ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -265,11 +264,11 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
return -EINVAL;
#else
- VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+ VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
- if (rw == READ)
+ if (iov_iter_rw(iter) == READ)
return nfs_file_direct_read(iocb, iter, pos);
- return nfs_file_direct_write(iocb, iter, pos);
+ return nfs_file_direct_write(iocb, iter);
#endif /* CONFIG_NFS_SWAP */
}
@@ -393,7 +392,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
- aio_complete(dreq->iocb, res, 0);
+ dreq->iocb->ki_complete(dreq->iocb, res, 0);
}
complete_all(&dreq->completion);
@@ -960,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
- loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -969,25 +967,16 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
- loff_t end;
- size_t count = iov_iter_count(iter);
- end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
-
- nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
+ loff_t pos, end;
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
- file, count, (long long) pos);
+ file, iov_iter_count(iter), (long long) iocb->ki_pos);
- result = generic_write_checks(file, &pos, &count, 0);
- if (result)
- goto out;
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES,
+ iov_iter_count(iter));
- result = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- result = 0;
- if (!count)
- goto out;
+ pos = iocb->ki_pos;
+ end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
mutex_lock(&inode->i_mutex);
@@ -1002,7 +991,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
goto out_unlock;
}
- task_io_account_write(count);
+ task_io_account_write(iov_iter_count(iter));
result = -ENOMEM;
dreq = nfs_direct_req_alloc();
@@ -1010,7 +999,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
goto out_unlock;
dreq->inode = inode;
- dreq->bytes_left = count;
+ dreq->bytes_left = iov_iter_count(iter);
dreq->io_start = pos;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -1050,7 +1039,6 @@ out_release:
nfs_direct_req_release(dreq);
out_unlock:
mutex_unlock(&inode->i_mutex);
-out:
return result;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc781fa..c40e4363e746 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
-#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/swap.h>
@@ -171,14 +170,14 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
- if (iocb->ki_filp->f_flags & O_DIRECT)
+ if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_read(iocb, to, iocb->ki_pos);
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
+ result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
if (result > 0)
@@ -199,7 +198,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n",
filp, (unsigned long) count, (unsigned long long) *ppos);
- res = nfs_revalidate_mapping(inode, filp->f_mapping);
+ res = nfs_revalidate_mapping_protected(inode, filp->f_mapping);
if (!res) {
res = generic_file_splice_read(filp, ppos, pipe, count, flags);
if (res > 0)
@@ -372,6 +371,10 @@ start:
nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
+ /*
+ * Wait for O_DIRECT to complete
+ */
+ nfs_inode_dio_wait(mapping->host);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
@@ -619,6 +622,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/* make sure the cache has finished storing the page */
nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+ wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
+
lock_page(page);
mapping = page_file_mapping(page);
if (mapping != inode->i_mapping)
@@ -668,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
unsigned long written = 0;
ssize_t result;
size_t count = iov_iter_count(from);
- loff_t pos = iocb->ki_pos;
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
- if (file->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, from, pos);
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ result = generic_write_checks(iocb, from);
+ if (result <= 0)
+ return result;
+ return nfs_file_direct_write(iocb, from);
+ }
dprintk("NFS: write(%pD2, %zu@%Ld)\n",
- file, count, (long long) pos);
+ file, count, (long long) iocb->ki_pos);
result = -EBUSY;
if (IS_SWAPFILE(inode))
@@ -686,7 +695,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
/*
* O_APPEND implies that we must revalidate the file length.
*/
- if (file->f_flags & O_APPEND) {
+ if (iocb->ki_flags & IOCB_APPEND) {
result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
@@ -920,8 +929,6 @@ EXPORT_SYMBOL_GPL(nfs_flock);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 83107be3dd01..d42dff6d5e98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
* This is a copy of the common vmtruncate, but with the locking
* corrected to take into account the fact that NFS requires
* inode->i_size to be updated under the inode->i_lock.
+ * Note: must be called with inode->i_lock held!
*/
static int nfs_vmtruncate(struct inode * inode, loff_t offset)
{
@@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
if (err)
goto out;
- spin_lock(&inode->i_lock);
i_size_write(inode, offset);
/* Optimisation */
if (offset == 0)
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&inode->i_lock);
truncate_pagecache(inode, offset);
+ spin_lock(&inode->i_lock);
out:
return err;
}
@@ -585,10 +586,15 @@ out:
* Note: we do this in the *proc.c in order to ensure that
* it works for things like exclusive creates too.
*/
-void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
+void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
+ struct nfs_fattr *fattr)
{
+ /* Barrier: bump the attribute generation count. */
+ nfs_fattr_set_barrier(fattr);
+
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
- spin_lock(&inode->i_lock);
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
@@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_gid = attr->ia_gid;
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL);
- spin_unlock(&inode->i_lock);
}
if ((attr->ia_valid & ATTR_SIZE) != 0) {
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
+ nfs_update_inode(inode, fattr);
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
if (mapping->nrpages != 0) {
if (S_ISREG(inode->i_mode)) {
+ unmap_mapping_range(mapping, 0, 0, 0);
ret = nfs_sync_mapping(mapping);
if (ret < 0)
return ret;
@@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode)
}
/**
- * nfs_revalidate_mapping - Revalidate the pagecache
+ * __nfs_revalidate_mapping - Revalidate the pagecache
* @inode - pointer to host inode
* @mapping - pointer to mapping
+ * @may_lock - take inode->i_mutex?
*/
-int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+static int __nfs_revalidate_mapping(struct inode *inode,
+ struct address_space *mapping,
+ bool may_lock)
{
struct nfs_inode *nfsi = NFS_I(inode);
unsigned long *bitlock = &nfsi->flags;
@@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
- ret = nfs_invalidate_mapping(inode, mapping);
+ if (may_lock) {
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_invalidate_mapping(inode, mapping);
+ mutex_unlock(&inode->i_mutex);
+ } else
+ ret = nfs_invalidate_mapping(inode, mapping);
trace_nfs_invalidate_mapping_exit(inode, ret);
clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
@@ -1123,6 +1139,29 @@ out:
return ret;
}
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ return __nfs_revalidate_mapping(inode, mapping, false);
+}
+
+/**
+ * nfs_revalidate_mapping_protected - Revalidate the pagecache
+ * @inode - pointer to host inode
+ * @mapping - pointer to mapping
+ *
+ * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex
+ * while invalidating the mapping.
+ */
+int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping)
+{
+ return __nfs_revalidate_mapping(inode, mapping, true);
+}
+
static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat
return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
}
-static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
-{
- if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
- return 0;
- return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
-}
-
static atomic_long_t nfs_attr_generation_counter;
static unsigned long nfs_read_attr_generation_counter(void)
@@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void)
{
return atomic_long_inc_return(&nfs_attr_generation_counter);
}
+EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter);
void nfs_fattr_init(struct nfs_fattr *fattr)
{
@@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
+/**
+ * nfs_fattr_set_barrier
+ * @fattr: attributes
+ *
+ * Used to set a barrier after an attribute was updated. This
+ * barrier ensures that older attributes from RPC calls that may
+ * have raced with our update cannot clobber these new values.
+ * Note that you are still responsible for ensuring that other
+ * operations which change the attribute on the server do not
+ * collide.
+ */
+void nfs_fattr_set_barrier(struct nfs_fattr *fattr)
+{
+ fattr->gencount = nfs_inc_attr_generation_counter();
+}
+
struct nfs_fattr *nfs_alloc_fattr(void)
{
struct nfs_fattr *fattr;
@@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
nfs_ctime_need_update(inode, fattr) ||
- nfs_size_need_update(inode, fattr) ||
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
@@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
int status;
spin_lock(&inode->i_lock);
+ nfs_fattr_set_barrier(fattr);
status = nfs_post_op_update_inode_locked(inode, fattr);
spin_unlock(&inode->i_lock);
@@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
/**
- * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache
* @inode - pointer to inode
* @fattr - updated attributes
*
@@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
*
* This function is mainly designed to be used by the ->write_done() functions.
*/
-int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{
int status;
- spin_lock(&inode->i_lock);
/* Don't do a WCC update if these attributes are already stale */
if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
!nfs_inode_attrs_need_update(inode, fattr)) {
@@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
}
out_noforce:
status = nfs_post_op_update_inode_locked(inode, fattr);
+ return status;
+}
+
+/**
+ * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it. Fake up
+ * weak cache consistency data, if none exist.
+ *
+ * This function is mainly designed to be used by the ->write_done() functions.
+ */
+int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
+{
+ int status;
+
+ spin_lock(&inode->i_lock);
+ nfs_fattr_set_barrier(fattr);
+ status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
spin_unlock(&inode->i_lock);
return status;
}
@@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
+ /* Set barrier to be more recent than all outstanding updates */
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
} else {
if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
@@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
}
+ /* Set the barrier to be more recent than this fattr */
+ if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ nfsi->attr_gencount = fattr->gencount;
}
invalid &= ~NFS_INO_INVALID_ATTR;
/* Don't invalidate the data if we were to blame */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b802fb3a2d99..9e6475bc5ba2 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
int nfs_write_need_commit(struct nfs_pgio_header *);
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 78e557c3ab87..1f11d2533ee4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
@@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+ nfs_writeback_update_inode(hdr);
return 0;
}
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 2a932fdc57cb..53852a4bd88b 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+ if (entry->fattr->fileid != entry->ino) {
+ entry->fattr->mounted_on_fileid = entry->ino;
+ entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
+ }
+
/* In fact, a post_op_fh3: */
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8646af9b11d2..86d6214ea022 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+ if (pos == new)
+ goto found;
+
if (pos->rpc_ops != new->rpc_ops)
continue;
@@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
- nfs4_schedule_lease_recovery(pos);
- status = nfs4_wait_clnt_recover(pos);
- }
spin_lock(&nn->nfs_client_lock);
if (status < 0)
break;
@@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
*/
if (!nfs4_match_client_owner_id(pos, new))
continue;
-
+found:
atomic_inc(&pos->cl_count);
*result = pos;
status = 0;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8b46389c4c5b..0181cde1d102 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -170,8 +170,6 @@ const struct file_operations nfs4_file_operations = {
#else
.llseek = nfs_file_llseek,
#endif
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 88180ac5ea0e..627f37c44456 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
if (!cinfo->atomic || cinfo->before != dir->i_version)
nfs_force_lookup_revalidate(dir);
dir->i_version = cinfo->after;
+ nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfs_fscache_invalidate(dir);
spin_unlock(&dir->i_lock);
}
@@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode;
+ opendata->o_arg.share_access = nfs4_map_atomic_open_share(
+ NFS_SB(opendata->dentry->d_sb),
+ fmode, 0);
memset(&opendata->o_res, 0, sizeof(opendata->o_res));
memset(&opendata->c_res, 0, sizeof(opendata->c_res));
nfs4_init_opendata_res(opendata);
@@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir,
opendata->o_res.f_attr, sattr,
state, label, olabel);
if (status == 0) {
- nfs_setattr_update_inode(state->inode, sattr);
- nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr);
+ nfs_setattr_update_inode(state->inode, sattr,
+ opendata->o_res.f_attr);
nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
}
}
@@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_EXPIRED:
if (!nfs4_stateid_match(&calldata->arg.stateid,
- &state->stateid)) {
+ &state->open_stateid)) {
rpc_restart_call_prepare(task);
goto out_release;
}
@@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
- nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid);
+ nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid);
/* Calculate the change in open mode */
calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
@@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label);
if (status == 0) {
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
nfs_setsecurity(inode, fattr, label);
}
nfs4_label_free(label);
@@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task,
}
if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), hdr->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
+ nfs_writeback_update_inode(hdr);
}
return 0;
}
@@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (status == 0) {
clp->cl_clientid = res.clientid;
- clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
- if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R))
+ clp->cl_exchange_flags = res.flags;
+ /* Client ID is not confirmed */
+ if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) {
+ clear_bit(NFS4_SESSION_ESTABLISHED,
+ &clp->cl_session->session_state);
clp->cl_seqid = res.seqid;
+ }
kfree(clp->cl_serverowner);
clp->cl_serverowner = res.server_owner;
@@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session,
struct nfs41_create_session_res *res)
{
nfs4_copy_sessionid(&session->sess_id, &res->sessionid);
+ /* Mark client id and session as being confirmed */
+ session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
+ set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state);
session->flags = res->flags;
memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs));
if (res->flags & SESSION4_BACK_CHAN)
@@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
dprintk("--> nfs4_proc_destroy_session\n");
/* session is still being setup */
- if (session->clp->cl_cons_state != NFS_CS_READY)
- return status;
+ if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state))
+ return 0;
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
trace_nfs4_destroy_session(session->clp, status);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index fc46c7455898..e3ea2c5324d6 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -70,6 +70,7 @@ struct nfs4_session {
enum nfs4_session_state {
NFS4_SESSION_INITING,
+ NFS4_SESSION_ESTABLISHED,
};
extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ad908e9ce9c..f95e3b58bbc3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp,
status = nfs4_proc_exchange_id(clp, cred);
if (status != NFS4_OK)
return status;
- set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- return nfs41_walk_client_list(clp, result, cred);
+ status = nfs41_walk_client_list(clp, result, cred);
+ if (status < 0)
+ return status;
+ if (clp != *result)
+ return 0;
+
+ /* Purge state if the client id was established in a prior instance */
+ if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)
+ set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ else
+ set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
+ status = nfs_wait_client_init_complete(clp);
+ if (status < 0)
+ nfs_put_client(clp);
+ return status;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b09cc23d6f43..c63189acd052 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0)
- nfs_setattr_update_inode(inode, sattr);
+ nfs_setattr_update_inode(inode, sattr, fattr);
dprintk("NFS reply setattr: %d\n", status);
return status;
}
@@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = hdr->inode;
-
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
+ nfs_writeback_update_inode(hdr);
return 0;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 568ecf0a880f..b8f5c63f77b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
static void nfs_readpage_release(struct nfs_page *req)
{
- struct inode *d_inode = req->wb_context->dentry->d_inode;
+ struct inode *inode = req->wb_context->dentry->d_inode;
- dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
(long long)req_offset(req));
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+ nfs_readpage_to_fscache(inode, req->wb_page, 0);
unlock_page(req->wb_page);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 595d81e354d1..759931088094 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode)
return 0;
}
+static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+ struct nfs_fattr *fattr)
+{
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
+
+ if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+ return;
+ if (argp->offset + resp->count != fattr->size)
+ return;
+ if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+ return;
+ /* Set attribute barrier */
+ nfs_fattr_set_barrier(fattr);
+}
+
+void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
+{
+ struct nfs_fattr *fattr = hdr->res.fattr;
+ struct inode *inode = hdr->inode;
+
+ if (fattr == NULL)
+ return;
+ spin_lock(&inode->i_lock);
+ nfs_writeback_check_extend(hdr, fattr);
+ nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);
+
/*
* This function is called when the WRITE call is complete.
*/
@@ -1846,11 +1876,6 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
* request from the inode / page_private pointer and
* release it */
nfs_inode_remove_request(req);
- /*
- * In case nfs_inode_remove_request has marked the
- * page as being dirty
- */
- cancel_dirty_page(page, PAGE_CACHE_SIZE);
nfs_unlock_and_release_request(req);
}
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 683bf718aead..fc2d108f5272 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -6,6 +6,7 @@ config NFSD
select SUNRPC
select EXPORTFS
select NFS_ACL_SUPPORT if NFSD_V2_ACL
+ depends on MULTIUSER
help
Choose Y here if you want to allow other computers to access
files residing on this system using Sun's Network File System
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdbc78c72542..03d647bf195d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
seg->offset = iomap.offset;
seg->length = iomap.length;
- dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es);
+ dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
return 0;
out_error:
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 9da89fddab33..9aa2796da90d 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
p = xdr_decode_hyper(p, &bex.foff);
if (bex.foff & (block_size - 1)) {
- dprintk("%s: unaligned offset %lld\n",
+ dprintk("%s: unaligned offset 0x%llx\n",
__func__, bex.foff);
goto fail;
}
p = xdr_decode_hyper(p, &bex.len);
if (bex.len & (block_size - 1)) {
- dprintk("%s: unaligned length %lld\n",
+ dprintk("%s: unaligned length 0x%llx\n",
__func__, bex.foff);
goto fail;
}
p = xdr_decode_hyper(p, &bex.soff);
if (bex.soff & (block_size - 1)) {
- dprintk("%s: unaligned disk offset %lld\n",
+ dprintk("%s: unaligned disk offset 0x%llx\n",
__func__, bex.soff);
goto fail;
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 3c1bfa155571..6904213a4363 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
{
struct super_block *sb = exp->ex_path.mnt->mnt_sb;
- if (exp->ex_flags & NFSEXP_NOPNFS)
+ if (!(exp->ex_flags & NFSEXP_PNFS))
return;
if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
list_move_tail(&lp->lo_perstate, reaplist);
return;
}
- end = seg->offset;
+ lo->offset = layout_end(seg);
} else {
/* retain the whole layout segment on a split. */
if (layout_end(seg) < end) {
dprintk("%s: split not supported\n", __func__);
return;
}
-
- lo->offset = layout_end(seg);
+ end = seg->offset;
}
layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
spin_lock(&clp->cl_lock);
list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
+ if (ls->ls_layout_type != lrp->lr_layout_type)
+ continue;
+
if (lrp->lr_return_type == RETURN_FSID &&
!fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
&cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
- nfsd4_cb_layout_fail(ls);
+ trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
printk(KERN_WARNING
"nfsd: client %s failed to respond to layout recall. "
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d30bea8d0277..92b9d97aff4f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
gdp->gd_notify_types &= ops->notify_types;
- exp_put(exp);
out:
+ exp_put(exp);
return nfserr;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f6b2a09f793f..326a545ea7b2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp)
nfs4_put_stid(&dp->dl_stid);
}
while (!list_empty(&clp->cl_revoked)) {
- dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+ dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
nfs4_put_stid(&dp->dl_stid);
}
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
} else
nfs4_free_openowner(&oo->oo_owner);
spin_unlock(&clp->cl_lock);
- return oo;
+ return ret;
}
static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -4932,20 +4932,22 @@ nfs4_transform_lock_offset(struct file_lock *lock)
lock->fl_end = OFFSET_MAX;
}
-static void nfsd4_fl_get_owner(struct file_lock *dst, struct file_lock *src)
+static fl_owner_t
+nfsd4_fl_get_owner(fl_owner_t owner)
{
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)src->fl_owner;
- dst->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lo->lo_owner));
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
+
+ nfs4_get_stateowner(&lo->lo_owner);
+ return owner;
}
-static void nfsd4_fl_put_owner(struct file_lock *fl)
+static void
+nfsd4_fl_put_owner(fl_owner_t owner)
{
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
- if (lo) {
+ if (lo)
nfs4_put_stateowner(&lo->lo_owner);
- fl->fl_owner = NULL;
- }
}
static const struct lock_manager_operations nfsd_posix_mng_ops = {
@@ -5062,7 +5064,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
} else
nfs4_free_lockowner(&lo->lo_owner);
spin_unlock(&clp->cl_lock);
- return lo;
+ return ret;
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index df5e66caf100..5fb7e78169a6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
p = xdr_decode_hyper(p, &lgp->lg_seg.length);
p = xdr_decode_hyper(p, &lgp->lg_minlength);
- nfsd4_decode_stateid(argp, &lgp->lg_sid);
+
+ status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
+ if (status)
+ return status;
+
READ_BUF(4);
lgp->lg_maxcount = be32_to_cpup(p++);
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
p = xdr_decode_hyper(p, &lcp->lc_seg.length);
lcp->lc_reclaim = be32_to_cpup(p++);
- nfsd4_decode_stateid(argp, &lcp->lc_sid);
+
+ status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
+ if (status)
+ return status;
+
READ_BUF(4);
lcp->lc_newoffset = be32_to_cpup(p++);
if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
READ_BUF(16);
p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
p = xdr_decode_hyper(p, &lrp->lr_seg.length);
- nfsd4_decode_stateid(argp, &lrp->lr_sid);
+
+ status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
+ if (status)
+ return status;
+
READ_BUF(4);
lrp->lrf_body_len = be32_to_cpup(p++);
if (lrp->lrf_body_len > 0) {
@@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
*p++ = cpu_to_be32(lrp->lrs_present);
if (lrp->lrs_present)
- nfsd4_encode_stateid(xdr, &lrp->lr_sid);
+ return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
return nfs_ok;
}
#endif /* CONFIG_NFSD_PNFS */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 83a9694ec485..46ec934f5dee 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
{
unsigned int hashsize;
unsigned int i;
+ int status = 0;
max_drc_entries = nfsd_cache_size_limit();
atomic_set(&num_drc_entries, 0);
hashsize = nfsd_hashsize(max_drc_entries);
maskbits = ilog2(hashsize);
- register_shrinker(&nfsd_reply_cache_shrinker);
+ status = register_shrinker(&nfsd_reply_cache_shrinker);
+ if (status)
+ return status;
+
drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
0, 0, NULL);
if (!drc_slab)
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 741fd02e0444..8df0f3b7839b 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -405,13 +405,14 @@ nilfs_palloc_rest_groups_in_desc_block(const struct inode *inode,
static int nilfs_palloc_count_desc_blocks(struct inode *inode,
unsigned long *desc_blocks)
{
- unsigned long blknum;
+ __u64 blknum;
int ret;
ret = nilfs_bmap_last_key(NILFS_I(inode)->i_bmap, &blknum);
if (likely(!ret))
*desc_blocks = DIV_ROUND_UP(
- blknum, NILFS_MDT(inode)->mi_blocks_per_desc_block);
+ (unsigned long)blknum,
+ NILFS_MDT(inode)->mi_blocks_per_desc_block);
return ret;
}
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index aadbd0b5e3e8..27f75bcbeb30 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -152,9 +152,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
*
* %-EEXIST - A record associated with @key already exist.
*/
-int nilfs_bmap_insert(struct nilfs_bmap *bmap,
- unsigned long key,
- unsigned long rec)
+int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec)
{
int ret;
@@ -191,19 +189,47 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
return bmap->b_ops->bop_delete(bmap, key);
}
-int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
+/**
+ * nilfs_bmap_seek_key - seek a valid entry and return its key
+ * @bmap: bmap struct
+ * @start: start key number
+ * @keyp: place to store valid key
+ *
+ * Description: nilfs_bmap_seek_key() seeks a valid key on @bmap
+ * starting from @start, and stores it to @keyp if found.
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - No valid entry was found
+ */
+int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp)
{
- __u64 lastkey;
int ret;
down_read(&bmap->b_sem);
- ret = bmap->b_ops->bop_last_key(bmap, &lastkey);
+ ret = bmap->b_ops->bop_seek_key(bmap, start, keyp);
+ up_read(&bmap->b_sem);
+
+ if (ret < 0)
+ ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ return ret;
+}
+
+int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp)
+{
+ int ret;
+
+ down_read(&bmap->b_sem);
+ ret = bmap->b_ops->bop_last_key(bmap, keyp);
up_read(&bmap->b_sem);
if (ret < 0)
ret = nilfs_bmap_convert_error(bmap, __func__, ret);
- else
- *key = lastkey;
return ret;
}
@@ -224,7 +250,7 @@ int nilfs_bmap_last_key(struct nilfs_bmap *bmap, unsigned long *key)
*
* %-ENOENT - A record associated with @key does not exist.
*/
-int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
+int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key)
{
int ret;
@@ -235,7 +261,7 @@ int nilfs_bmap_delete(struct nilfs_bmap *bmap, unsigned long key)
return nilfs_bmap_convert_error(bmap, __func__, ret);
}
-static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
+static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, __u64 key)
{
__u64 lastkey;
int ret;
@@ -276,7 +302,7 @@ static int nilfs_bmap_do_truncate(struct nilfs_bmap *bmap, unsigned long key)
*
* %-ENOMEM - Insufficient amount of memory available.
*/
-int nilfs_bmap_truncate(struct nilfs_bmap *bmap, unsigned long key)
+int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key)
{
int ret;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b89e68076adc..bfa817ce40b3 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -76,8 +76,10 @@ struct nilfs_bmap_operations {
union nilfs_binfo *);
int (*bop_mark)(struct nilfs_bmap *, __u64, int);
- /* The following functions are internal use only. */
+ int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *);
int (*bop_last_key)(const struct nilfs_bmap *, __u64 *);
+
+ /* The following functions are internal use only. */
int (*bop_check_insert)(const struct nilfs_bmap *, __u64);
int (*bop_check_delete)(struct nilfs_bmap *, __u64);
int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int);
@@ -153,10 +155,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
-int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
-int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
-int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
-int nilfs_bmap_truncate(struct nilfs_bmap *, unsigned long);
+int nilfs_bmap_insert(struct nilfs_bmap *bmap, __u64 key, unsigned long rec);
+int nilfs_bmap_delete(struct nilfs_bmap *bmap, __u64 key);
+int nilfs_bmap_seek_key(struct nilfs_bmap *bmap, __u64 start, __u64 *keyp);
+int nilfs_bmap_last_key(struct nilfs_bmap *bmap, __u64 *keyp);
+int nilfs_bmap_truncate(struct nilfs_bmap *bmap, __u64 key);
void nilfs_bmap_clear(struct nilfs_bmap *);
int nilfs_bmap_propagate(struct nilfs_bmap *, struct buffer_head *);
void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *, struct list_head *);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..059f37137f9a 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,6 +31,8 @@
#include "alloc.h"
#include "dat.h"
+static void __nilfs_btree_init(struct nilfs_bmap *bmap);
+
static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
{
struct nilfs_btree_path *path;
@@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
return ret;
}
+/**
+ * nilfs_btree_root_broken - verify consistency of btree root node
+ * @node: btree root node to be examined
+ * @ino: inode number
+ *
+ * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ */
+static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
+ unsigned long ino)
+{
+ int level, flags, nchildren;
+ int ret = 0;
+
+ level = nilfs_btree_node_get_level(node);
+ flags = nilfs_btree_node_get_flags(node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+
+ if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+ level > NILFS_BTREE_LEVEL_MAX ||
+ nchildren < 0 ||
+ nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
+ pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
+ ino, level, flags, nchildren);
+ ret = 1;
+ }
+ return ret;
+}
+
int nilfs_btree_broken_node_block(struct buffer_head *bh)
{
int ret;
@@ -603,6 +633,44 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
return 0;
}
+/**
+ * nilfs_btree_get_next_key - get next valid key from btree path array
+ * @btree: bmap struct of btree
+ * @path: array of nilfs_btree_path struct
+ * @minlevel: start level
+ * @nextkey: place to store the next valid key
+ *
+ * Return Value: If a next key was found, 0 is returned. Otherwise,
+ * -ENOENT is returned.
+ */
+static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
+ const struct nilfs_btree_path *path,
+ int minlevel, __u64 *nextkey)
+{
+ struct nilfs_btree_node *node;
+ int maxlevel = nilfs_btree_height(btree) - 1;
+ int index, next_adj, level;
+
+ /* Next index is already set to bp_index for leaf nodes. */
+ next_adj = 0;
+ for (level = minlevel; level <= maxlevel; level++) {
+ if (level == maxlevel)
+ node = nilfs_btree_get_root(btree);
+ else
+ node = nilfs_btree_get_nonroot_node(path, level);
+
+ index = path[level].bp_index + next_adj;
+ if (index < nilfs_btree_node_get_nchildren(node)) {
+ /* Next key is in this node */
+ *nextkey = nilfs_btree_node_get_key(node, index);
+ return 0;
+ }
+ /* For non-leaf nodes, next index is stored at bp_index + 1. */
+ next_adj = 1;
+ }
+ return -ENOENT;
+}
+
static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
__u64 key, int level, __u64 *ptrp)
{
@@ -1533,6 +1601,27 @@ out:
return ret;
}
+static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
+ __u64 *keyp)
+{
+ struct nilfs_btree_path *path;
+ const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
+ int ret;
+
+ path = nilfs_btree_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
+ if (!ret)
+ *keyp = start;
+ else if (ret == -ENOENT)
+ ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
+
+ nilfs_btree_free_path(path);
+ return ret;
+}
+
static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
{
struct nilfs_btree_path *path;
@@ -1713,7 +1802,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
/* convert and insert */
dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
- nilfs_btree_init(btree);
+ __nilfs_btree_init(btree);
if (nreq != NULL) {
nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -2268,7 +2357,9 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
.bop_assign = nilfs_btree_assign,
.bop_mark = nilfs_btree_mark,
+ .bop_seek_key = nilfs_btree_seek_key,
.bop_last_key = nilfs_btree_last_key,
+
.bop_check_insert = NULL,
.bop_check_delete = nilfs_btree_check_delete,
.bop_gather_data = nilfs_btree_gather_data,
@@ -2288,18 +2379,31 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_assign = nilfs_btree_assign_gc,
.bop_mark = NULL,
+ .bop_seek_key = NULL,
.bop_last_key = NULL,
+
.bop_check_insert = NULL,
.bop_check_delete = NULL,
.bop_gather_data = NULL,
};
-int nilfs_btree_init(struct nilfs_bmap *bmap)
+static void __nilfs_btree_init(struct nilfs_bmap *bmap)
{
bmap->b_ops = &nilfs_btree_ops;
bmap->b_nchildren_per_block =
NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
- return 0;
+}
+
+int nilfs_btree_init(struct nilfs_bmap *bmap)
+{
+ int ret = 0;
+
+ __nilfs_btree_init(bmap);
+
+ if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
+ bmap->b_inode->i_ino))
+ ret = -EIO;
+ return ret;
}
void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 0d58075f34e2..b6596cab9e99 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -53,6 +53,13 @@ nilfs_cpfile_get_offset(const struct inode *cpfile, __u64 cno)
return do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile));
}
+static __u64 nilfs_cpfile_first_checkpoint_in_block(const struct inode *cpfile,
+ unsigned long blkoff)
+{
+ return (__u64)nilfs_cpfile_checkpoints_per_block(cpfile) * blkoff
+ + 1 - NILFS_MDT(cpfile)->mi_first_entry_offset;
+}
+
static unsigned long
nilfs_cpfile_checkpoints_in_block(const struct inode *cpfile,
__u64 curr,
@@ -146,6 +153,44 @@ static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile,
create, nilfs_cpfile_block_init, bhp);
}
+/**
+ * nilfs_cpfile_find_checkpoint_block - find and get a buffer on cpfile
+ * @cpfile: inode of cpfile
+ * @start_cno: start checkpoint number (inclusive)
+ * @end_cno: end checkpoint number (inclusive)
+ * @cnop: place to store the next checkpoint number
+ * @bhp: place to store a pointer to buffer_head struct
+ *
+ * Return Value: On success, it returns 0. On error, the following negative
+ * error code is returned.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOENT - no block exists in the range.
+ */
+static int nilfs_cpfile_find_checkpoint_block(struct inode *cpfile,
+ __u64 start_cno, __u64 end_cno,
+ __u64 *cnop,
+ struct buffer_head **bhp)
+{
+ unsigned long start, end, blkoff;
+ int ret;
+
+ if (unlikely(start_cno > end_cno))
+ return -ENOENT;
+
+ start = nilfs_cpfile_get_blkoff(cpfile, start_cno);
+ end = nilfs_cpfile_get_blkoff(cpfile, end_cno);
+
+ ret = nilfs_mdt_find_block(cpfile, start, end, &blkoff, bhp);
+ if (!ret)
+ *cnop = (blkoff == start) ? start_cno :
+ nilfs_cpfile_first_checkpoint_in_block(cpfile, blkoff);
+ return ret;
+}
+
static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile,
__u64 cno)
{
@@ -403,14 +448,15 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
return -ENOENT; /* checkpoint number 0 is invalid */
down_read(&NILFS_MDT(cpfile)->mi_sem);
- for (n = 0; cno < cur_cno && n < nci; cno += ncps) {
- ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
- ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
+ for (n = 0; n < nci; cno += ncps) {
+ ret = nilfs_cpfile_find_checkpoint_block(
+ cpfile, cno, cur_cno - 1, &cno, &bh);
if (ret < 0) {
- if (ret != -ENOENT)
- goto out;
- continue; /* skip hole */
+ if (likely(ret == -ENOENT))
+ break;
+ goto out;
}
+ ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno);
kaddr = kmap_atomic(bh->b_page);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 82f4865e86dd..ebf89fd8ac1a 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -173,6 +173,21 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
return ret;
}
+static int nilfs_direct_seek_key(const struct nilfs_bmap *direct, __u64 start,
+ __u64 *keyp)
+{
+ __u64 key;
+
+ for (key = start; key <= NILFS_DIRECT_KEY_MAX; key++) {
+ if (nilfs_direct_get_ptr(direct, key) !=
+ NILFS_BMAP_INVALID_PTR) {
+ *keyp = key;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp)
{
__u64 key, lastkey;
@@ -355,7 +370,9 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
.bop_assign = nilfs_direct_assign,
.bop_mark = NULL,
+ .bop_seek_key = nilfs_direct_seek_key,
.bop_last_key = nilfs_direct_last_key,
+
.bop_check_insert = nilfs_direct_check_insert,
.bop_check_delete = NULL,
.bop_gather_data = nilfs_direct_gather_data,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a8c728acb7a8..54575e3cc1a2 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -143,8 +143,6 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
*/
const struct file_operations nilfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.unlocked_ioctl = nilfs_ioctl,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b5969538f39..be936df4ba73 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
#include <linux/mpage.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
@@ -106,7 +106,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
err = nilfs_transaction_begin(inode->i_sb, &ti, 1);
if (unlikely(err))
goto out;
- err = nilfs_bmap_insert(ii->i_bmap, (unsigned long)blkoff,
+ err = nilfs_bmap_insert(ii->i_bmap, blkoff,
(unsigned long)bh_result);
if (unlikely(err != 0)) {
if (err == -EEXIST) {
@@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
}
static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
+nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -314,18 +313,17 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
size_t count = iov_iter_count(iter);
ssize_t size;
- if (rw == WRITE)
+ if (iov_iter_rw(iter) == WRITE)
return 0;
/* Needs synchronization with the cleaner */
- size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
- nilfs_get_block);
+ size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && size < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
@@ -443,21 +441,20 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
void nilfs_set_inode_flags(struct inode *inode)
{
unsigned int flags = NILFS_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
- S_DIRSYNC);
if (flags & FS_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & FS_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & FS_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & FS_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
- mapping_set_gfp_mask(inode->i_mapping,
- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ new_fl |= S_DIRSYNC;
+ inode_set_flags(inode, new_fl, S_SYNC | S_APPEND | S_IMMUTABLE |
+ S_NOATIME | S_DIRSYNC);
}
int nilfs_read_inode_common(struct inode *inode,
@@ -542,6 +539,8 @@ static int __nilfs_read_inode(struct super_block *sb,
brelse(bh);
up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
nilfs_set_inode_flags(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
return 0;
failed_unmap:
@@ -714,7 +713,7 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags)
static void nilfs_truncate_bmap(struct nilfs_inode_info *ii,
unsigned long from)
{
- unsigned long b;
+ __u64 b;
int ret;
if (!test_bit(NILFS_I_BMAP, &ii->i_state))
@@ -729,7 +728,7 @@ repeat:
if (b < from)
return;
- b -= min_t(unsigned long, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
+ b -= min_t(__u64, NILFS_MAX_TRUNCATE_BLOCKS, b - from);
ret = nilfs_bmap_truncate(ii->i_bmap, b);
nilfs_relax_pressure_in_lock(ii->vfs_inode.i_sb);
if (!ret || (ret == -ENOMEM &&
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 892cf5ffdb8e..dee34d990281 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -261,6 +261,60 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create,
}
/**
+ * nilfs_mdt_find_block - find and get a buffer on meta data file.
+ * @inode: inode of the meta data file
+ * @start: start block offset (inclusive)
+ * @end: end block offset (inclusive)
+ * @blkoff: block offset
+ * @out_bh: place to store a pointer to buffer_head struct
+ *
+ * nilfs_mdt_find_block() looks up an existing block in range of
+ * [@start, @end] and stores pointer to a buffer head of the block to
+ * @out_bh, and block offset to @blkoff, respectively. @out_bh and
+ * @blkoff are substituted only when zero is returned.
+ *
+ * Return Value: On success, it returns 0. On error, the following negative
+ * error code is returned.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOENT - no block was found in the range
+ */
+int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
+ unsigned long end, unsigned long *blkoff,
+ struct buffer_head **out_bh)
+{
+ __u64 next;
+ int ret;
+
+ if (unlikely(start > end))
+ return -ENOENT;
+
+ ret = nilfs_mdt_read_block(inode, start, true, out_bh);
+ if (!ret) {
+ *blkoff = start;
+ goto out;
+ }
+ if (unlikely(ret != -ENOENT || start == ULONG_MAX))
+ goto out;
+
+ ret = nilfs_bmap_seek_key(NILFS_I(inode)->i_bmap, start + 1, &next);
+ if (!ret) {
+ if (next <= end) {
+ ret = nilfs_mdt_read_block(inode, next, true, out_bh);
+ if (!ret)
+ *blkoff = next;
+ } else {
+ ret = -ENOENT;
+ }
+ }
+out:
+ return ret;
+}
+
+/**
* nilfs_mdt_delete_block - make a hole on the meta data file.
* @inode: inode of the meta data file
* @block: block offset
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ab172e8549c5..fe529a87a208 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -78,6 +78,9 @@ int nilfs_mdt_get_block(struct inode *, unsigned long, int,
void (*init_block)(struct inode *,
struct buffer_head *, void *),
struct buffer_head **);
+int nilfs_mdt_find_block(struct inode *inode, unsigned long start,
+ unsigned long end, unsigned long *blkoff,
+ struct buffer_head **out_bh);
int nilfs_mdt_delete_block(struct inode *, unsigned long);
int nilfs_mdt_forget_block(struct inode *, unsigned long);
int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
@@ -111,7 +114,10 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode)
return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
}
-#define nilfs_mdt_bgl_lock(inode, bg) \
- (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock)
+static inline spinlock_t *
+nilfs_mdt_bgl_lock(struct inode *inode, unsigned int block_group)
+{
+ return bgl_lock_ptr(NILFS_MDT(inode)->mi_bgl, block_group);
+}
#endif /* _NILFS_MDT_H */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 700ecbcca55d..45d650addd56 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -89,18 +89,16 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
void nilfs_forget_buffer(struct buffer_head *bh)
{
struct page *page = bh->b_page;
+ const unsigned long clear_bits =
+ (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
+ 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
+ 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
lock_buffer(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_checked(bh);
- clear_buffer_nilfs_redirected(bh);
- clear_buffer_async_write(bh);
- clear_buffer_dirty(bh);
+ set_mask_bits(&bh->b_state, clear_bits, 0);
if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page);
- clear_buffer_uptodate(bh);
- clear_buffer_mapped(bh);
bh->b_blocknr = -1;
ClearPageUptodate(page);
ClearPageMappedToDisk(page);
@@ -421,6 +419,10 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
if (page_has_buffers(page)) {
struct buffer_head *bh, *head;
+ const unsigned long clear_bits =
+ (1 << BH_Uptodate | 1 << BH_Dirty | 1 << BH_Mapped |
+ 1 << BH_Async_Write | 1 << BH_NILFS_Volatile |
+ 1 << BH_NILFS_Checked | 1 << BH_NILFS_Redirected);
bh = head = page_buffers(page);
do {
@@ -430,13 +432,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
"discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size);
}
- clear_buffer_async_write(bh);
- clear_buffer_dirty(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_checked(bh);
- clear_buffer_nilfs_redirected(bh);
- clear_buffer_uptodate(bh);
- clear_buffer_mapped(bh);
+ set_mask_bits(&bh->b_state, clear_bits, 0);
unlock_buffer(bh);
} while (bh = bh->b_this_page, bh != head);
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 469086b9f99b..c6abbad9b8e3 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -24,6 +24,7 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bitops.h>
#include <linux/bio.h>
#include <linux/completion.h>
#include <linux/blkdev.h>
@@ -1588,7 +1589,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page) {
lock_page(bd_page);
@@ -1688,7 +1688,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1768,7 +1767,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1788,12 +1786,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
*/
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- clear_buffer_async_write(bh);
- clear_buffer_delay(bh);
- clear_buffer_nilfs_volatile(bh);
- clear_buffer_nilfs_redirected(bh);
+ const unsigned long set_bits = (1 << BH_Uptodate);
+ const unsigned long clear_bits =
+ (1 << BH_Dirty | 1 << BH_Async_Write |
+ 1 << BH_Delay | 1 << BH_NILFS_Volatile |
+ 1 << BH_NILFS_Redirected);
+
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
@@ -1907,6 +1906,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
struct nilfs_inode_info *ii, *n;
+ int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
int defer_iput = false;
spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1919,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
brelse(ii->i_bh);
ii->i_bh = NULL;
list_del_init(&ii->i_dirty);
- if (!ii->vfs_inode.i_nlink) {
+ if (!ii->vfs_inode.i_nlink || during_mount) {
/*
- * Defer calling iput() to avoid a deadlock
- * over I_SYNC flag for inodes with i_nlink == 0
+ * Defer calling iput() to avoid deadlocks if
+ * i_nlink == 0 or mount is not yet finished.
*/
list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
defer_iput = true;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5bc2a1cf73c3..c1725f20a9d1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1020,7 +1020,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
struct dentry *dentry;
int ret;
- if (cno < 0 || cno > nilfs->ns_cno)
+ if (cno > nilfs->ns_cno)
return false;
if (cno >= nilfs_last_cno(nilfs))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9a66ff79ff27..d2f97ecca6a5 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
!(marks_mask & FS_ISDIR & ~marks_ignored_mask))
return false;
- if (event_mask & marks_mask & ~marks_ignored_mask)
+ if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
+ ~marks_ignored_mask)
return true;
return false;
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 36ae529511c4..2ff263e6d363 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -8,7 +8,7 @@ ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
-ccflags-y := -DNTFS_VERSION=\"2.1.31\"
+ccflags-y := -DNTFS_VERSION=\"2.1.32\"
ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d184dc..7bb487e663b4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
/*
* file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -28,7 +28,6 @@
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
-#include <linux/aio.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -329,62 +328,166 @@ err_out:
return err;
}
-/**
- * ntfs_fault_in_pages_readable -
- *
- * Fault a number of userspace pages into pagetables.
- *
- * Unlike include/linux/pagemap.h::fault_in_pages_readable(), this one copes
- * with more than two userspace pages as well as handling the single page case
- * elegantly.
- *
- * If you find this difficult to understand, then think of the while loop being
- * the following code, except that we do without the integer variable ret:
- *
- * do {
- * ret = __get_user(c, uaddr);
- * uaddr += PAGE_SIZE;
- * } while (!ret && uaddr < end);
- *
- * Note, the final __get_user() may well run out-of-bounds of the user buffer,
- * but _not_ out-of-bounds of the page the user buffer belongs to, and since
- * this is only a read and not a write, and since it is still in the same page,
- * it should not matter and this makes the code much simpler.
- */
-static inline void ntfs_fault_in_pages_readable(const char __user *uaddr,
- int bytes)
-{
- const char __user *end;
- volatile char c;
-
- /* Set @end to the first byte outside the last page we care about. */
- end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes);
-
- while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end))
- ;
-}
-
-/**
- * ntfs_fault_in_pages_readable_iovec -
- *
- * Same as ntfs_fault_in_pages_readable() but operates on an array of iovecs.
- */
-static inline void ntfs_fault_in_pages_readable_iovec(const struct iovec *iov,
- size_t iov_ofs, int bytes)
+static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb,
+ struct iov_iter *from)
{
- do {
- const char __user *buf;
- unsigned len;
+ loff_t pos;
+ s64 end, ll;
+ ssize_t err;
+ unsigned long flags;
+ struct file *file = iocb->ki_filp;
+ struct inode *vi = file_inode(file);
+ ntfs_inode *base_ni, *ni = NTFS_I(vi);
+ ntfs_volume *vol = ni->vol;
- buf = iov->iov_base + iov_ofs;
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- ntfs_fault_in_pages_readable(buf, len);
- bytes -= len;
- iov++;
- iov_ofs = 0;
- } while (bytes);
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%zx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)iocb->ki_pos,
+ iov_iter_count(from));
+ err = generic_write_checks(iocb, from);
+ if (unlikely(err <= 0))
+ goto out;
+ /*
+ * All checks have passed. Before we start doing any writing we want
+ * to abort any totally illegal writes.
+ */
+ BUG_ON(NInoMstProtected(ni));
+ BUG_ON(ni->type != AT_DATA);
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ /* Only $DATA attributes can be encrypted. */
+ /*
+ * Reminder for later: Encrypted files are _always_
+ * non-resident so that the content can always be encrypted.
+ */
+ ntfs_debug("Denying write access to encrypted file.");
+ err = -EACCES;
+ goto out;
+ }
+ if (NInoCompressed(ni)) {
+ /* Only unnamed $DATA attribute can be compressed. */
+ BUG_ON(ni->name_len);
+ /*
+ * Reminder for later: If resident, the data is not actually
+ * compressed. Only on the switch to non-resident does
+ * compression kick in. This is in contrast to encrypted files
+ * (see above).
+ */
+ ntfs_error(vi->i_sb, "Writing to compressed files is not "
+ "implemented yet. Sorry.");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ base_ni = ni;
+ if (NInoAttr(ni))
+ base_ni = ni->ext.base_ntfs_ino;
+ err = file_remove_suid(file);
+ if (unlikely(err))
+ goto out;
+ /*
+ * Our ->update_time method always succeeds thus file_update_time()
+ * cannot fail either so there is no need to check the return code.
+ */
+ file_update_time(file);
+ pos = iocb->ki_pos;
+ /* The first byte after the last cluster being written to. */
+ end = (pos + iov_iter_count(from) + vol->cluster_size_mask) &
+ ~(u64)vol->cluster_size_mask;
+ /*
+ * If the write goes beyond the allocated size, extend the allocation
+ * to cover the whole of the write, rounded up to the nearest cluster.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (end > ll) {
+ /*
+ * Extend the allocation without changing the data size.
+ *
+ * Note we ensure the allocation is big enough to at least
+ * write some data but we do not require the allocation to be
+ * complete, i.e. it may be partial.
+ */
+ ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
+ if (likely(ll >= 0)) {
+ BUG_ON(pos >= ll);
+ /* If the extension was partial truncate the write. */
+ if (end > ll) {
+ ntfs_debug("Truncating write to inode 0x%lx, "
+ "attribute type 0x%x, because "
+ "the allocation was only "
+ "partially extended.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ iov_iter_truncate(from, ll - pos);
+ }
+ } else {
+ err = ll;
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /* Perform a partial write if possible or fail. */
+ if (pos < ll) {
+ ntfs_debug("Truncating write to inode 0x%lx "
+ "attribute type 0x%x, because "
+ "extending the allocation "
+ "failed (error %d).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (int)-err);
+ iov_iter_truncate(from, ll - pos);
+ } else {
+ if (err != -ENOSPC)
+ ntfs_error(vi->i_sb, "Cannot perform "
+ "write to inode "
+ "0x%lx, attribute "
+ "type 0x%x, because "
+ "extending the "
+ "allocation failed "
+ "(error %ld).",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type),
+ (long)-err);
+ else
+ ntfs_debug("Cannot perform write to "
+ "inode 0x%lx, "
+ "attribute type 0x%x, "
+ "because there is not "
+ "space left.",
+ vi->i_ino, (unsigned)
+ le32_to_cpu(ni->type));
+ goto out;
+ }
+ }
+ }
+ /*
+ * If the write starts beyond the initialized size, extend it up to the
+ * beginning of the write and initialize all non-sparse space between
+ * the old initialized size and the new one. This automatically also
+ * increments the vfs inode->i_size to keep it above or equal to the
+ * initialized_size.
+ */
+ read_lock_irqsave(&ni->size_lock, flags);
+ ll = ni->initialized_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ if (pos > ll) {
+ /*
+ * Wait for ongoing direct i/o to complete before proceeding.
+ * New direct i/o cannot start as we hold i_mutex.
+ */
+ inode_dio_wait(vi);
+ err = ntfs_attr_extend_initialized(ni, pos);
+ if (unlikely(err < 0))
+ ntfs_error(vi->i_sb, "Cannot perform write to inode "
+ "0x%lx, attribute type 0x%x, because "
+ "extending the initialized size "
+ "failed (error %d).", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (int)-err);
+ }
+out:
+ return err;
}
/**
@@ -421,8 +524,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping,
goto err_out;
}
}
- err = add_to_page_cache_lru(*cached_page, mapping, index,
- GFP_KERNEL);
+ err = add_to_page_cache_lru(*cached_page, mapping,
+ index, GFP_KERNEL);
if (unlikely(err)) {
if (err == -EEXIST)
continue;
@@ -1268,180 +1371,6 @@ rl_not_mapped_enoent:
return err;
}
-/*
- * Copy as much as we can into the pages and return the number of bytes which
- * were successfully copied. If a fault is encountered then clear the pages
- * out to (ofs + bytes) and return the number of bytes which were copied.
- */
-static inline size_t ntfs_copy_from_user(struct page **pages,
- unsigned nr_pages, unsigned ofs, const char __user *buf,
- size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t total = 0;
- unsigned len;
- int left;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- left = __copy_from_user_inatomic(addr + ofs, buf, len);
- kunmap_atomic(addr);
- if (unlikely(left)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- left = __copy_from_user(addr + ofs, buf, len);
- kunmap(*pages);
- if (unlikely(left))
- goto err_out;
- }
- total += len;
- bytes -= len;
- if (!bytes)
- break;
- buf += len;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- total += len - left;
- /* Zero the rest of the target like __copy_from_user(). */
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
-static size_t __ntfs_copy_from_user_iovec_inatomic(char *vaddr,
- const struct iovec *iov, size_t iov_ofs, size_t bytes)
-{
- size_t total = 0;
-
- while (1) {
- const char __user *buf = iov->iov_base + iov_ofs;
- unsigned len;
- size_t left;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- left = __copy_from_user_inatomic(vaddr, buf, len);
- total += len;
- bytes -= len;
- vaddr += len;
- if (unlikely(left)) {
- total -= left;
- break;
- }
- if (!bytes)
- break;
- iov++;
- iov_ofs = 0;
- }
- return total;
-}
-
-static inline void ntfs_set_next_iovec(const struct iovec **iovp,
- size_t *iov_ofsp, size_t bytes)
-{
- const struct iovec *iov = *iovp;
- size_t iov_ofs = *iov_ofsp;
-
- while (bytes) {
- unsigned len;
-
- len = iov->iov_len - iov_ofs;
- if (len > bytes)
- len = bytes;
- bytes -= len;
- iov_ofs += len;
- if (iov->iov_len == iov_ofs) {
- iov++;
- iov_ofs = 0;
- }
- }
- *iovp = iov;
- *iov_ofsp = iov_ofs;
-}
-
-/*
- * This has the same side-effects and return value as ntfs_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s
- * single-segment behaviour.
- *
- * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when
- * atomic and when not atomic. This is ok because it calls
- * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In
- * fact, the only difference between __copy_from_user_inatomic() and
- * __copy_from_user() is that the latter calls might_sleep() and the former
- * should not zero the tail of the buffer on error. And on many architectures
- * __copy_from_user_inatomic() is just defined to __copy_from_user() so it
- * makes no difference at all on those architectures.
- */
-static inline size_t ntfs_copy_from_user_iovec(struct page **pages,
- unsigned nr_pages, unsigned ofs, const struct iovec **iov,
- size_t *iov_ofs, size_t bytes)
-{
- struct page **last_page = pages + nr_pages;
- char *addr;
- size_t copied, len, total = 0;
-
- do {
- len = PAGE_CACHE_SIZE - ofs;
- if (len > bytes)
- len = bytes;
- addr = kmap_atomic(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs,
- *iov, *iov_ofs, len);
- kunmap_atomic(addr);
- if (unlikely(copied != len)) {
- /* Do it the slow way. */
- addr = kmap(*pages);
- copied = __ntfs_copy_from_user_iovec_inatomic(addr +
- ofs, *iov, *iov_ofs, len);
- if (unlikely(copied != len))
- goto err_out;
- kunmap(*pages);
- }
- total += len;
- ntfs_set_next_iovec(iov, iov_ofs, len);
- bytes -= len;
- if (!bytes)
- break;
- ofs = 0;
- } while (++pages < last_page);
-out:
- return total;
-err_out:
- BUG_ON(copied > len);
- /* Zero the rest of the target like __copy_from_user(). */
- memset(addr + ofs + copied, 0, len - copied);
- kunmap(*pages);
- total += copied;
- ntfs_set_next_iovec(iov, iov_ofs, copied);
- while (++pages < last_page) {
- bytes -= len;
- if (!bytes)
- break;
- len = PAGE_CACHE_SIZE;
- if (len > bytes)
- len = bytes;
- zero_user(*pages, 0, len);
- }
- goto out;
-}
-
static inline void ntfs_flush_dcache_pages(struct page **pages,
unsigned nr_pages)
{
@@ -1762,86 +1691,83 @@ err_out:
return err;
}
-static void ntfs_write_failed(struct address_space *mapping, loff_t to)
+/*
+ * Copy as much as we can into the pages and return the number of bytes which
+ * were successfully copied. If a fault is encountered then clear the pages
+ * out to (ofs + bytes) and return the number of bytes which were copied.
+ */
+static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
+ unsigned ofs, struct iov_iter *i, size_t bytes)
{
- struct inode *inode = mapping->host;
+ struct page **last_page = pages + nr_pages;
+ size_t total = 0;
+ struct iov_iter data = *i;
+ unsigned len, copied;
- if (to > inode->i_size) {
- truncate_pagecache(inode, inode->i_size);
- ntfs_truncate_vfs(inode);
- }
+ do {
+ len = PAGE_CACHE_SIZE - ofs;
+ if (len > bytes)
+ len = bytes;
+ copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
+ len);
+ total += copied;
+ bytes -= copied;
+ if (!bytes)
+ break;
+ iov_iter_advance(&data, copied);
+ if (copied < len)
+ goto err;
+ ofs = 0;
+ } while (++pages < last_page);
+out:
+ return total;
+err:
+ /* Zero the rest of the target like __copy_from_user(). */
+ len = PAGE_CACHE_SIZE - copied;
+ do {
+ if (len > bytes)
+ len = bytes;
+ zero_user(*pages, copied, len);
+ bytes -= len;
+ copied = 0;
+ len = PAGE_CACHE_SIZE;
+ } while (++pages < last_page);
+ goto out;
}
/**
- * ntfs_file_buffered_write -
- *
- * Locking: The vfs is holding ->i_mutex on the inode.
+ * ntfs_perform_write - perform buffered write to a file
+ * @file: file to write to
+ * @i: iov_iter with data to write
+ * @pos: byte offset in file at which to begin writing to
*/
-static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs,
- loff_t pos, loff_t *ppos, size_t count)
+static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
+ loff_t pos)
{
- struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *vi = mapping->host;
ntfs_inode *ni = NTFS_I(vi);
ntfs_volume *vol = ni->vol;
struct page *pages[NTFS_MAX_PAGES_PER_CLUSTER];
struct page *cached_page = NULL;
- char __user *buf = NULL;
- s64 end, ll;
VCN last_vcn;
LCN lcn;
- unsigned long flags;
- size_t bytes, iov_ofs = 0; /* Offset in the current iovec. */
- ssize_t status, written;
+ size_t bytes;
+ ssize_t status, written = 0;
unsigned nr_pages;
- int err;
- ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, "
- "pos 0x%llx, count 0x%lx.",
- vi->i_ino, (unsigned)le32_to_cpu(ni->type),
- (unsigned long long)pos, (unsigned long)count);
- if (unlikely(!count))
- return 0;
- BUG_ON(NInoMstProtected(ni));
- /*
- * If the attribute is not an index root and it is encrypted or
- * compressed, we cannot write to it yet. Note we need to check for
- * AT_INDEX_ALLOCATION since this is the type of both directory and
- * index inodes.
- */
- if (ni->type != AT_INDEX_ALLOCATION) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- /*
- * Reminder for later: Encrypted files are _always_
- * non-resident so that the content can always be
- * encrypted.
- */
- ntfs_debug("Denying write access to encrypted file.");
- return -EACCES;
- }
- if (NInoCompressed(ni)) {
- /* Only unnamed $DATA attribute can be compressed. */
- BUG_ON(ni->type != AT_DATA);
- BUG_ON(ni->name_len);
- /*
- * Reminder for later: If resident, the data is not
- * actually compressed. Only on the switch to non-
- * resident does compression kick in. This is in
- * contrast to encrypted files (see above).
- */
- ntfs_error(vi->i_sb, "Writing to compressed files is "
- "not implemented yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
+ ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos "
+ "0x%llx, count 0x%lx.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type),
+ (unsigned long long)pos,
+ (unsigned long)iov_iter_count(i));
/*
* If a previous ntfs_truncate() failed, repeat it and abort if it
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
+ int err;
+
inode_dio_wait(vi);
err = ntfs_truncate(vi);
if (err || NInoTruncateFailed(ni)) {
@@ -1855,81 +1781,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
return err;
}
}
- /* The first byte after the write. */
- end = pos + count;
- /*
- * If the write goes beyond the allocated size, extend the allocation
- * to cover the whole of the write, rounded up to the nearest cluster.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (end > ll) {
- /* Extend the allocation without changing the data size. */
- ll = ntfs_attr_extend_allocation(ni, end, -1, pos);
- if (likely(ll >= 0)) {
- BUG_ON(pos >= ll);
- /* If the extension was partial truncate the write. */
- if (end > ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "the allocation was only "
- "partially extended.",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type));
- end = ll;
- count = ll - pos;
- }
- } else {
- err = ll;
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->allocated_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- /* Perform a partial write if possible or fail. */
- if (pos < ll) {
- ntfs_debug("Truncating write to inode 0x%lx, "
- "attribute type 0x%x, because "
- "extending the allocation "
- "failed (error code %i).",
- vi->i_ino, (unsigned)
- le32_to_cpu(ni->type), err);
- end = ll;
- count = ll - pos;
- } else {
- ntfs_error(vol->sb, "Cannot perform write to "
- "inode 0x%lx, attribute type "
- "0x%x, because extending the "
- "allocation failed (error "
- "code %i).", vi->i_ino,
- (unsigned)
- le32_to_cpu(ni->type), err);
- return err;
- }
- }
- }
- written = 0;
- /*
- * If the write starts beyond the initialized size, extend it up to the
- * beginning of the write and initialize all non-sparse space between
- * the old initialized size and the new one. This automatically also
- * increments the vfs inode->i_size to keep it above or equal to the
- * initialized_size.
- */
- read_lock_irqsave(&ni->size_lock, flags);
- ll = ni->initialized_size;
- read_unlock_irqrestore(&ni->size_lock, flags);
- if (pos > ll) {
- err = ntfs_attr_extend_initialized(ni, pos);
- if (err < 0) {
- ntfs_error(vol->sb, "Cannot perform write to inode "
- "0x%lx, attribute type 0x%x, because "
- "extending the initialized size "
- "failed (error code %i).", vi->i_ino,
- (unsigned)le32_to_cpu(ni->type), err);
- status = err;
- goto err_out;
- }
- }
/*
* Determine the number of pages per cluster for non-resident
* attributes.
@@ -1937,10 +1788,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
nr_pages = 1;
if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
- /* Finally, perform the actual write. */
last_vcn = -1;
- if (likely(nr_segs == 1))
- buf = iov->iov_base;
do {
VCN vcn;
pgoff_t idx, start_idx;
@@ -1965,10 +1813,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
vol->cluster_size_bits, false);
up_read(&ni->runlist.lock);
if (unlikely(lcn < LCN_HOLE)) {
- status = -EIO;
if (lcn == LCN_ENOMEM)
status = -ENOMEM;
- else
+ else {
+ status = -EIO;
ntfs_error(vol->sb, "Cannot "
"perform write to "
"inode 0x%lx, "
@@ -1977,6 +1825,7 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
"is corrupt.",
vi->i_ino, (unsigned)
le32_to_cpu(ni->type));
+ }
break;
}
if (lcn == LCN_HOLE) {
@@ -1989,8 +1838,9 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
}
}
}
- if (bytes > count)
- bytes = count;
+ if (bytes > iov_iter_count(i))
+ bytes = iov_iter_count(i);
+again:
/*
* Bring in the user page(s) that we will copy from _first_.
* Otherwise there is a nasty deadlock on copying from the same
@@ -1999,10 +1849,10 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
* pages being swapped out between us bringing them into memory
* and doing the actual copying.
*/
- if (likely(nr_segs == 1))
- ntfs_fault_in_pages_readable(buf, bytes);
- else
- ntfs_fault_in_pages_readable_iovec(iov, iov_ofs, bytes);
+ if (unlikely(iov_iter_fault_in_multipages_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
/* Get and lock @do_pages starting at index @start_idx. */
status = __ntfs_grab_cache_pages(mapping, start_idx, do_pages,
pages, &cached_page);
@@ -2018,56 +1868,57 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
status = ntfs_prepare_pages_for_non_resident_write(
pages, do_pages, pos, bytes);
if (unlikely(status)) {
- loff_t i_size;
-
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- /*
- * The write preparation may have instantiated
- * allocated space outside i_size. Trim this
- * off again. We can ignore any errors in this
- * case as we will just be waisting a bit of
- * allocated space, which is not a disaster.
- */
- i_size = i_size_read(vi);
- if (pos + bytes > i_size) {
- ntfs_write_failed(mapping, pos + bytes);
- }
break;
}
}
u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
- if (likely(nr_segs == 1)) {
- copied = ntfs_copy_from_user(pages + u, do_pages - u,
- ofs, buf, bytes);
- buf += copied;
- } else
- copied = ntfs_copy_from_user_iovec(pages + u,
- do_pages - u, ofs, &iov, &iov_ofs,
- bytes);
+ copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
+ i, bytes);
ntfs_flush_dcache_pages(pages + u, do_pages - u);
- status = ntfs_commit_pages_after_write(pages, do_pages, pos,
- bytes);
- if (likely(!status)) {
- written += copied;
- count -= copied;
- pos += copied;
- if (unlikely(copied != bytes))
- status = -EFAULT;
+ status = 0;
+ if (likely(copied == bytes)) {
+ status = ntfs_commit_pages_after_write(pages, do_pages,
+ pos, bytes);
+ if (!status)
+ status = bytes;
}
do {
unlock_page(pages[--do_pages]);
page_cache_release(pages[do_pages]);
} while (do_pages);
- if (unlikely(status))
+ if (unlikely(status < 0))
break;
- balance_dirty_pages_ratelimited(mapping);
+ copied = status;
cond_resched();
- } while (count);
-err_out:
- *ppos = pos;
+ if (unlikely(!copied)) {
+ size_t sc;
+
+ /*
+ * We failed to copy anything. Fall back to single
+ * segment length write.
+ *
+ * This is needed to avoid possible livelock in the
+ * case that all segments in the iov cannot be copied
+ * at once without a pagefault.
+ */
+ sc = iov_iter_single_seg_count(i);
+ if (bytes > sc)
+ bytes = sc;
+ goto again;
+ }
+ iov_iter_advance(i, copied);
+ pos += copied;
+ written += copied;
+ balance_dirty_pages_ratelimited(mapping);
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+ } while (iov_iter_count(i));
if (cached_page)
page_cache_release(cached_page);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
@@ -2077,63 +1928,36 @@ err_out:
}
/**
- * ntfs_file_aio_write_nolock -
+ * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock()
+ * @iocb: IO state structure
+ * @from: iov_iter with data to write
+ *
+ * Basically the same as generic_file_write_iter() except that it ends up
+ * up calling ntfs_perform_write() instead of generic_perform_write() and that
+ * O_DIRECT is not implemented.
*/
-static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
- const struct iovec *iov, unsigned long nr_segs, loff_t *ppos)
+static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- loff_t pos;
- size_t count; /* after file limit checks */
- ssize_t written, err;
+ struct inode *vi = file_inode(file);
+ ssize_t written = 0;
+ ssize_t err;
- count = iov_length(iov, nr_segs);
- pos = *ppos;
+ mutex_lock(&vi->i_mutex);
/* We can write back this queue in page reclaim. */
- current->backing_dev_info = inode_to_bdi(inode);
- written = 0;
- err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
- if (err)
- goto out;
- if (!count)
- goto out;
- err = file_remove_suid(file);
- if (err)
- goto out;
- err = file_update_time(file);
- if (err)
- goto out;
- written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
- count);
-out:
+ current->backing_dev_info = inode_to_bdi(vi);
+ err = ntfs_prepare_file_for_write(iocb, from);
+ if (iov_iter_count(from) && !err)
+ written = ntfs_perform_write(file, from, iocb->ki_pos);
current->backing_dev_info = NULL;
- return written ? written : err;
-}
-
-/**
- * ntfs_file_aio_write -
- */
-static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- ssize_t ret;
-
- BUG_ON(iocb->ki_pos != pos);
-
- mutex_lock(&inode->i_mutex);
- ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
- if (ret > 0) {
- int err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ mutex_unlock(&vi->i_mutex);
+ if (likely(written > 0)) {
+ err = generic_write_sync(file, iocb->ki_pos, written);
if (err < 0)
- ret = err;
+ written = 0;
}
- return ret;
+ iocb->ki_pos += written;
+ return written ? written : err;
}
/**
@@ -2197,37 +2021,15 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
#endif /* NTFS_RW */
const struct file_operations ntfs_file_ops = {
- .llseek = generic_file_llseek, /* Seek inside file. */
- .read = new_sync_read, /* Read from file. */
- .read_iter = generic_file_read_iter, /* Async read from file. */
+ .llseek = generic_file_llseek,
+ .read_iter = generic_file_read_iter,
#ifdef NTFS_RW
- .write = do_sync_write, /* Write to file. */
- .aio_write = ntfs_file_aio_write, /* Async write to file. */
- /*.release = ,*/ /* Last file is closed. See
- fs/ext2/file.c::
- ext2_release_file() for
- how to use this to discard
- preallocated space for
- write opened files. */
- .fsync = ntfs_file_fsync, /* Sync a file to disk. */
- /*.aio_fsync = ,*/ /* Sync all outstanding async
- i/o operations on a
- kiocb. */
+ .write_iter = ntfs_file_write_iter,
+ .fsync = ntfs_file_fsync,
#endif /* NTFS_RW */
- /*.ioctl = ,*/ /* Perform function on the
- mounted filesystem. */
- .mmap = generic_file_mmap, /* Mmap file. */
- .open = ntfs_file_open, /* Open file. */
- .splice_read = generic_file_splice_read /* Zero-copy data send with
- the data source being on
- the ntfs partition. We do
- not need to care about the
- data destination. */
- /*.sendpage = ,*/ /* Zero-copy data send with
- the data destination being
- on the ntfs partition. We
- do not need to care about
- the data source. */
+ .mmap = generic_file_mmap,
+ .open = ntfs_file_open,
+ .splice_read = generic_file_splice_read,
};
const struct inode_operations ntfs_file_inode_ops = {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b9949d363..1d0c21df0d80 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/log2.h>
-#include <linux/aio.h>
#include "aops.h"
#include "attrib.h"
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 044158bd22be..2d7f76e52c37 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3370,7 +3370,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
ret = ocfs2_get_right_path(et, left_path, &right_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
right_el = path_leaf_el(right_path);
@@ -3453,8 +3453,7 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
subtree_index);
}
out:
- if (right_path)
- ocfs2_free_path(right_path);
+ ocfs2_free_path(right_path);
return ret;
}
@@ -3536,7 +3535,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
ret = ocfs2_get_left_path(et, right_path, &left_path);
if (ret) {
mlog_errno(ret);
- goto out;
+ return ret;
}
left_el = path_leaf_el(left_path);
@@ -3647,8 +3646,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
right_path, subtree_index);
}
out:
- if (left_path)
- ocfs2_free_path(left_path);
+ ocfs2_free_path(left_path);
return ret;
}
@@ -4334,17 +4332,17 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
} else if (path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
if (status)
- goto out;
+ goto exit;
if (left_cpos != 0) {
left_path = ocfs2_new_path_from_path(path);
if (!left_path)
- goto out;
+ goto exit;
status = ocfs2_find_path(et->et_ci, left_path,
left_cpos);
if (status)
- goto out;
+ goto free_left_path;
new_el = path_leaf_el(left_path);
@@ -4361,7 +4359,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
le16_to_cpu(new_el->l_next_free_rec),
le16_to_cpu(new_el->l_count));
status = -EINVAL;
- goto out;
+ goto free_left_path;
}
rec = &new_el->l_recs[
le16_to_cpu(new_el->l_next_free_rec) - 1];
@@ -4388,18 +4386,18 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
path->p_tree_depth > 0) {
status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
if (status)
- goto out;
+ goto free_left_path;
if (right_cpos == 0)
- goto out;
+ goto free_left_path;
right_path = ocfs2_new_path_from_path(path);
if (!right_path)
- goto out;
+ goto free_left_path;
status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
if (status)
- goto out;
+ goto free_right_path;
new_el = path_leaf_el(right_path);
rec = &new_el->l_recs[0];
@@ -4413,7 +4411,7 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
(unsigned long long)le64_to_cpu(eb->h_blkno),
le16_to_cpu(new_el->l_next_free_rec));
status = -EINVAL;
- goto out;
+ goto free_right_path;
}
rec = &new_el->l_recs[1];
}
@@ -4430,12 +4428,11 @@ ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
ret = contig_type;
}
-out:
- if (left_path)
- ocfs2_free_path(left_path);
- if (right_path)
- ocfs2_free_path(right_path);
-
+free_right_path:
+ ocfs2_free_path(right_path);
+free_left_path:
+ ocfs2_free_path(left_path);
+exit:
return ret;
}
@@ -6858,13 +6855,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
- goto out;
+ return ret;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto out;
+ goto free_pages;
}
}
@@ -6996,9 +6993,8 @@ out_commit:
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
- if (pages)
- kfree(pages);
-
+free_pages:
+ kfree(pages);
return ret;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 44db1808cdb5..f906a250da6a 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
#include <linux/mpage.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
+#include <linux/uio.h>
#include <cluster/masklog.h>
@@ -663,6 +664,117 @@ static int ocfs2_is_overwrite(struct ocfs2_super *osb,
return 0;
}
+static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset,
+ u64 zero_len, int cluster_align)
+{
+ u32 p_cpos = 0;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ int ret = 0;
+
+ if (offset <= i_size_read(inode) || cluster_align)
+ return 0;
+
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ u64 s = i_size_read(inode);
+ sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
+ (do_div(s, osb->s_clustersize) >> 9);
+
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
+ zero_len >> 9, GFP_NOFS, false);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+ return ret;
+}
+
+static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
+ struct inode *inode, loff_t offset)
+{
+ u64 zero_start, zero_len, total_zero_len;
+ u32 p_cpos = 0, clusters_to_add;
+ u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
+ unsigned int num_clusters = 0;
+ unsigned int ext_flags = 0;
+ u32 size_div, offset_div;
+ int ret = 0;
+
+ {
+ u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ offset_div = do_div(o, osb->s_clustersize);
+ size_div = do_div(s, osb->s_clustersize);
+ }
+
+ if (offset <= i_size_read(inode))
+ return 0;
+
+ clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
+ ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
+ total_zero_len = offset - i_size_read(inode);
+ if (clusters_to_add)
+ total_zero_len -= offset_div;
+
+ /* Allocate clusters to fill out holes, and this is only needed
+ * when we add more than one clusters. Otherwise the cluster will
+ * be allocated during direct IO */
+ if (clusters_to_add > 1) {
+ ret = ocfs2_extend_allocation(inode,
+ OCFS2_I(inode)->ip_clusters,
+ clusters_to_add - 1, 0);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ while (total_zero_len) {
+ ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
+ &ext_flags);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
+ size_div;
+ zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
+ size_div;
+ zero_len = min(total_zero_len, zero_len);
+
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+ ret = blkdev_issue_zeroout(osb->sb->s_bdev,
+ zero_start >> 9, zero_len >> 9,
+ GFP_NOFS, false);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ total_zero_len -= zero_len;
+ v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
+
+ /* Only at first iteration can be cluster not aligned.
+ * So set size_div to 0 for the rest */
+ size_div = 0;
+ }
+
+out:
+ return ret;
+}
+
static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset)
@@ -677,8 +789,8 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
struct buffer_head *di_bh = NULL;
size_t count = iter->count;
journal_t *journal = osb->journal->j_journal;
- u32 zero_len;
- int cluster_align;
+ u64 zero_len_head, zero_len_tail;
+ int cluster_align_head, cluster_align_tail;
loff_t final_size = offset + count;
int append_write = offset >= i_size_read(inode) ? 1 : 0;
unsigned int num_clusters = 0;
@@ -686,9 +798,16 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
{
u64 o = offset;
+ u64 s = i_size_read(inode);
+
+ zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
+ cluster_align_head = !zero_len_head;
- zero_len = do_div(o, 1 << osb->s_clustersize_bits);
- cluster_align = !zero_len;
+ zero_len_tail = osb->s_clustersize -
+ do_div(s, osb->s_clustersize);
+ if ((offset - i_size_read(inode)) < zero_len_tail)
+ zero_len_tail = offset - i_size_read(inode);
+ cluster_align_tail = !zero_len_tail;
}
/*
@@ -706,21 +825,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
}
if (append_write) {
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ ret = ocfs2_inode_lock(inode, NULL, 1);
if (ret < 0) {
mlog_errno(ret);
goto clean_orphan;
}
+ /* zeroing out the previously allocated cluster tail
+ * that but not zeroed */
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
- ret = ocfs2_zero_extend(inode, di_bh, offset);
+ ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
+ zero_len_tail, cluster_align_tail);
else
- ret = ocfs2_extend_no_holes(inode, di_bh, offset,
+ ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
offset);
if (ret < 0) {
mlog_errno(ret);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
@@ -728,19 +849,15 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (is_overwrite < 0) {
mlog_errno(is_overwrite);
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
goto clean_orphan;
}
ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- di_bh = NULL;
}
- written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
- iter, offset,
- ocfs2_direct_IO_get_blocks,
- ocfs2_dio_end_io, NULL, 0);
+ written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+ offset, ocfs2_direct_IO_get_blocks,
+ ocfs2_dio_end_io, NULL, 0);
if (unlikely(written < 0)) {
loff_t i_size = i_size_read(inode);
@@ -771,15 +888,23 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
if (ret < 0)
mlog_errno(ret);
}
- } else if (written < 0 && append_write && !is_overwrite &&
- !cluster_align) {
+ } else if (written > 0 && append_write && !is_overwrite &&
+ !cluster_align_head) {
+ /* zeroing out the allocated cluster head */
u32 p_cpos = 0;
u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto clean_orphan;
+ }
+
ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
&num_clusters, &ext_flags);
if (ret < 0) {
mlog_errno(ret);
+ ocfs2_inode_unlock(inode, 0);
goto clean_orphan;
}
@@ -787,9 +912,11 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
p_cpos << (osb->s_clustersize_bits - 9),
- zero_len >> 9, GFP_KERNEL, false);
+ zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0)
mlog_errno(ret);
+
+ ocfs2_inode_unlock(inode, 0);
}
clean_orphan:
@@ -818,9 +945,7 @@ out:
return ret;
}
-static ssize_t ocfs2_direct_IO(int rw,
- struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
struct file *file = iocb->ki_filp;
@@ -842,12 +967,11 @@ static ssize_t ocfs2_direct_IO(int rw,
if (i_size_read(inode) <= offset && !full_coherency)
return 0;
- if (rw == READ)
- return __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev,
- iter, offset,
- ocfs2_direct_IO_get_blocks,
- ocfs2_dio_end_io, NULL, 0);
+ if (iov_iter_rw(iter) == READ)
+ return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, offset,
+ ocfs2_direct_IO_get_blocks,
+ ocfs2_dio_end_io, NULL, 0);
else
return ocfs2_direct_IO_write(iocb, iter, offset);
}
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155d54df..dd59599b022d 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
#ifndef OCFS2_AOPS_H
#define OCFS2_AOPS_H
-#include <linux/aio.h>
+#include <linux/fs.h>
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 2260fb9e6508..7fdc25a4d8c0 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -196,13 +196,14 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
} \
} while (0)
-#define mlog_errno(st) do { \
+#define mlog_errno(st) ({ \
int _st = (st); \
if (_st != -ERESTARTSYS && _st != -EINTR && \
_st != AOP_TRUNCATED_PAGE && _st != -ENOSPC && \
_st != -EDQUOT) \
mlog(ML_ERROR, "status = %lld\n", (long long)_st); \
-} while (0)
+ _st; \
+})
#define mlog_bug_on_msg(cond, fmt, args...) do { \
if (cond) { \
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b08050bd3f2e..ccd4dcfc3645 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -18,7 +18,7 @@
*
* linux/fs/minix/dir.c
*
- * Copyright (C) 1991, 1992 Linux Torvalds
+ * Copyright (C) 1991, 1992 Linus Torvalds
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
@@ -2047,22 +2047,19 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
const char *name,
int namelen)
{
- int ret;
+ int ret = 0;
struct ocfs2_dir_lookup_result lookup = { NULL, };
trace_ocfs2_check_dir_for_entry(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
- ret = -EEXIST;
- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
- goto bail;
+ if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
+ ret = -EEXIST;
+ mlog_errno(ret);
+ }
- ret = 0;
-bail:
ocfs2_free_dir_lookup_result(&lookup);
- if (ret)
- mlog_errno(ret);
return ret;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 11849a44dc5a..8b23aa2f52dd 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1391,6 +1391,11 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
int noqueue_attempted = 0;
int dlm_locked = 0;
+ if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
+ mlog_errno(-EINVAL);
+ return -EINVAL;
+ }
+
ocfs2_init_mask_waiter(&mw);
if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 29651167190d..540dc4bdd042 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -82,7 +82,6 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
}
status = ocfs2_test_inode_bit(osb, blkno, &set);
- trace_ocfs2_get_dentry_test_bit(status, set);
if (status < 0) {
if (status == -EINVAL) {
/*
@@ -96,6 +95,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
goto unlock_nfs_sync;
}
+ trace_ocfs2_get_dentry_test_bit(status, set);
/* If the inode allocator bit is clear, this inode must be stale */
if (!set) {
status = -ESTALE;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46e0d4e857c7..913fc250d85a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2106,7 +2106,7 @@ out:
}
static int ocfs2_prepare_inode_for_write(struct file *file,
- loff_t *ppos,
+ loff_t pos,
size_t count,
int appending,
int *direct_io,
@@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
int ret = 0, meta_level = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
- loff_t saved_pos = 0, end;
+ loff_t end;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
@@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
}
}
- /* work on a copy of ppos until we're sure that we won't have
- * to recalculate it due to relocking. */
- if (appending)
- saved_pos = i_size_read(inode);
- else
- saved_pos = *ppos;
-
- end = saved_pos + count;
+ end = pos + count;
- ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
+ ret = ocfs2_check_range_for_refcount(inode, pos, count);
if (ret == 1) {
ocfs2_inode_unlock(inode, meta_level);
meta_level = -1;
ret = ocfs2_prepare_inode_for_refcount(inode,
file,
- saved_pos,
+ pos,
count,
&meta_level);
if (has_refcount)
@@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* caller will have to retake some cluster
* locks and initiate the io as buffered.
*/
- ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
+ ret = ocfs2_check_range_for_holes(inode, pos, count);
if (ret == 1) {
/*
* Fallback to old way if the feature bit is not set.
@@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
break;
}
- if (appending)
- *ppos = saved_pos;
-
out_unlock:
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
- saved_pos, appending, count,
+ pos, appending, count,
direct_io, has_refcount);
if (meta_level >= 0)
@@ -2260,19 +2250,20 @@ out:
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
- int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
+ int direct_io, appending, rw_level, have_alloc_sem = 0;
int can_do_direct, has_refcount = 0;
ssize_t written = 0;
- size_t count = iov_iter_count(from);
- loff_t old_size, *ppos = &iocb->ki_pos;
+ ssize_t ret;
+ size_t count = iov_iter_count(from), orig_count;
+ loff_t old_size;
u32 old_clusters;
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct address_space *mapping = file->f_mapping;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
int unaligned_dio = 0;
+ int dropped_dio = 0;
trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2280,11 +2271,11 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
file->f_path.dentry->d_name.name,
(unsigned int)from->nr_segs); /* GRRRRR */
- if (iocb->ki_nbytes == 0)
+ if (count == 0)
return 0;
- appending = file->f_flags & O_APPEND ? 1 : 0;
- direct_io = file->f_flags & O_DIRECT ? 1 : 0;
+ appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
+ direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
mutex_lock(&inode->i_mutex);
@@ -2329,9 +2320,17 @@ relock:
ocfs2_inode_unlock(inode, 1);
}
+ orig_count = iov_iter_count(from);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0) {
+ if (ret)
+ mlog_errno(ret);
+ goto out;
+ }
+ count = ret;
+
can_do_direct = direct_io;
- ret = ocfs2_prepare_inode_for_write(file, ppos,
- iocb->ki_nbytes, appending,
+ ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
&can_do_direct, &has_refcount);
if (ret < 0) {
mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
}
if (direct_io && !is_sync_kiocb(iocb))
- unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
- *ppos);
+ unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
/*
* We can't complete the direct I/O as requested, fall back to
@@ -2353,6 +2351,9 @@ relock:
rw_level = -1;
direct_io = 0;
+ iocb->ki_flags &= ~IOCB_DIRECT;
+ iov_iter_reexpand(from, orig_count);
+ dropped_dio = 1;
goto relock;
}
@@ -2376,74 +2377,18 @@ relock:
/* communicate with ocfs2_dio_end_io */
ocfs2_iocb_set_rw_locked(iocb, rw_level);
- ret = generic_write_checks(file, ppos, &count,
- S_ISBLK(inode->i_mode));
- if (ret)
- goto out_dio;
-
- iov_iter_truncate(from, count);
- if (direct_io) {
- loff_t endbyte;
- ssize_t written_buffered;
- written = generic_file_direct_write(iocb, from, *ppos);
- if (written < 0 || written == count) {
- ret = written;
- goto out_dio;
- }
-
- /*
- * for completing the rest of the request.
- */
- *ppos += written;
- count -= written;
- written_buffered = generic_perform_write(file, from, *ppos);
- /*
- * If generic_file_buffered_write() returned a synchronous error
- * then we want to return the number of bytes which were
- * direct-written, or the error code if that was zero. Note
- * that this differs from normal direct-io semantics, which
- * will return -EFOO even if some bytes were written.
- */
- if (written_buffered < 0) {
- ret = written_buffered;
- goto out_dio;
- }
-
- iocb->ki_pos = *ppos + written_buffered;
- /* We need to ensure that the page cache pages are written to
- * disk and invalidated to preserve the expected O_DIRECT
- * semantics.
- */
- endbyte = *ppos + written_buffered - 1;
- ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
- endbyte);
- if (ret == 0) {
- written += written_buffered;
- invalidate_mapping_pages(mapping,
- *ppos >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
- } else {
- /*
- * We don't know how much we wrote, so just return
- * the number of bytes which were direct-written
- */
- }
- } else {
- current->backing_dev_info = inode_to_bdi(inode);
- written = generic_perform_write(file, from, *ppos);
- if (likely(written >= 0))
- iocb->ki_pos = *ppos + written;
- current->backing_dev_info = NULL;
- }
-
-out_dio:
+ written = __generic_file_write_iter(iocb, from);
/* buffered aio wouldn't have proper lock coverage today */
- BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+ BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+
+ if (unlikely(written <= 0))
+ goto no_sync;
- if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
- ((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ if (((file->f_flags & O_DSYNC) && !direct_io) ||
+ IS_SYNC(inode) || dropped_dio) {
+ ret = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
if (ret < 0)
written = ret;
@@ -2454,10 +2399,12 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, *ppos,
- *ppos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
+no_sync:
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
@@ -2549,7 +2496,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
* buffered reads protect themselves in ->readpage(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate.
*/
- if (filp->f_flags & O_DIRECT) {
+ if (iocb->ki_flags & IOCB_DIRECT) {
have_alloc_sem = 1;
ocfs2_iocb_set_sem_locked(iocb);
@@ -2583,7 +2530,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
trace_generic_file_aio_read_ret(ret);
/* buffered aio wouldn't have proper lock coverage today */
- BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
+ BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
/* see ocfs2_file_write_iter */
if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
@@ -2678,8 +2625,6 @@ const struct inode_operations ocfs2_special_file_iops = {
*/
const struct file_operations ocfs2_fops = {
.llseek = ocfs2_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
@@ -2726,8 +2671,6 @@ const struct file_operations ocfs2_dops = {
*/
const struct file_operations ocfs2_fops_no_plocks = {
.llseek = ocfs2_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.mmap = ocfs2_mmap,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3025c0da6b8a..be71ca0937f7 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -624,7 +624,7 @@ static int ocfs2_remove_inode(struct inode *inode,
ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
le16_to_cpu(di->i_suballoc_slot));
if (!inode_alloc_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
@@ -742,7 +742,7 @@ static int ocfs2_wipe_inode(struct inode *inode,
ORPHAN_DIR_SYSTEM_INODE,
orphaned_slot);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 044013455621..857bbbcd39f3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -666,7 +666,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
ocfs2_local_alloc_count_bits(alloc)) {
ocfs2_error(osb->sb, "local alloc inode %llu says it has "
- "%u free bits, but a count shows %u",
+ "%u used bits, but a count shows %u",
(unsigned long long)le64_to_cpu(alloc->i_blkno),
le32_to_cpu(alloc->id1.bitmap1.i_used),
ocfs2_local_alloc_count_bits(alloc));
@@ -839,7 +839,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
u32 *numbits,
struct ocfs2_alloc_reservation *resv)
{
- int numfound, bitoff, left, startoff, lastzero;
+ int numfound = 0, bitoff, left, startoff, lastzero;
int local_resv = 0;
struct ocfs2_alloc_reservation r;
void *bitmap = NULL;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b5c3a5ea3ee6..09f90cbf0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2322,10 +2322,10 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
trace_ocfs2_orphan_del(
(unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
- name, namelen);
+ name, strlen(name));
/* find it's spot in the orphan directory */
- status = ocfs2_find_entry(name, namelen, orphan_dir_inode,
+ status = ocfs2_find_entry(name, strlen(name), orphan_dir_inode,
&lookup);
if (status) {
mlog_errno(status);
@@ -2808,7 +2808,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
ORPHAN_DIR_SYSTEM_INODE,
osb->slot_num);
if (!orphan_dir_inode) {
- status = -EEXIST;
+ status = -ENOENT;
mlog_errno(status);
goto leave;
}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 8490c64d34fe..460c6c37e683 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
{
- if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+ if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
return 1;
return 0;
}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 20e37a3ed26f..db64ce2d4667 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -102,11 +102,11 @@
| OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
| OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
| OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \
- | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
+ | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
+ | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
| OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
- | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
- | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
/*
* Heartbeat-only devices are missing journals and other files. The
@@ -179,6 +179,11 @@
#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000
/*
+ * Append Direct IO support
+ */
+#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000
+
+/*
* backup superblock flag is used to indicate that this volume
* has backup superblocks.
*/
@@ -200,10 +205,6 @@
#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
-/*
- * Append Direct IO support
- */
-#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ee541f92dab4..df3a500789c7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4276,7 +4276,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
error = posix_acl_create(dir, &mode, &default_acl, &acl);
if (error) {
mlog_errno(error);
- goto out;
+ return error;
}
error = ocfs2_create_inode_in_orphan(dir, mode,
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index d5493e361a38..e78a203d44c8 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -427,7 +427,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
if (!si) {
status = -ENOMEM;
mlog_errno(status);
- goto bail;
+ return status;
}
si->si_extended = ocfs2_uses_extended_slot_map(osb);
@@ -452,7 +452,7 @@ int ocfs2_init_slot_info(struct ocfs2_super *osb)
osb->slot_info = (struct ocfs2_slot_info *)si;
bail:
- if (status < 0 && si)
+ if (status < 0)
__ocfs2_free_slot_info(si);
return status;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 1724d43d3da1..220cae7bbdbc 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -295,7 +295,7 @@ static int o2cb_cluster_check(void)
set_bit(node_num, netmap);
if (!memcmp(hbmap, netmap, sizeof(hbmap)))
return 0;
- if (i < O2CB_MAP_STABILIZE_COUNT)
+ if (i < O2CB_MAP_STABILIZE_COUNT - 1)
msleep(1000);
}
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 720aa389e0ea..2768eb1da2b8 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1004,10 +1004,8 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
BUG_ON(conn == NULL);
lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
- if (!lc) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!lc)
+ return -ENOMEM;
init_waitqueue_head(&lc->oc_wait);
init_completion(&lc->oc_sync_wait);
@@ -1063,7 +1061,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
}
out:
- if (rc && lc)
+ if (rc)
kfree(lc);
return rc;
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 0cb889a17ae1..4479029630bb 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2499,6 +2499,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
+ ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
+ start_bit, count);
goto bail;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 26675185b886..403c5660b306 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2069,6 +2069,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
+ memcpy(sb->s_uuid, di->id2.i_super.s_uuid,
+ sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
@@ -2333,7 +2335,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
mlog_errno(status);
goto bail;
}
- cleancache_init_shared_fs((char *)&di->id2.i_super.s_uuid, sb);
+ cleancache_init_shared_fs(sb);
bail:
return status;
@@ -2563,22 +2565,22 @@ static void ocfs2_handle_error(struct super_block *sb)
ocfs2_set_ro_flag(osb, 0);
}
-static char error_buf[1024];
-
-void __ocfs2_error(struct super_block *sb,
- const char *function,
- const char *fmt, ...)
+void __ocfs2_error(struct super_block *sb, const char *function,
+ const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
+ vaf.fmt = fmt;
+ vaf.va = &args;
/* Not using mlog here because we want to show the actual
* function the error came from. */
- printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ printk(KERN_CRIT "OCFS2: ERROR (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
ocfs2_handle_error(sb);
}
@@ -2586,18 +2588,21 @@ void __ocfs2_error(struct super_block *sb,
/* Handle critical errors. This is intentionally more drastic than
* ocfs2_handle_error, so we only use for things like journal errors,
* etc. */
-void __ocfs2_abort(struct super_block* sb,
- const char *function,
+void __ocfs2_abort(struct super_block *sb, const char *function,
const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
va_start(args, fmt);
- vsnprintf(error_buf, sizeof(error_buf), fmt, args);
- va_end(args);
- printk(KERN_CRIT "OCFS2: abort (device %s): %s: %s\n",
- sb->s_id, function, error_buf);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk(KERN_CRIT "OCFS2: abort (device %s): %s: %pV\n",
+ sb->s_id, function, &vaf);
+
+ va_end(args);
/* We don't have the cluster support yet to go straight to
* hard readonly in here. Until then, we want to keep
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 85b190dc132f..4ca7533be479 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1238,6 +1238,10 @@ static int ocfs2_xattr_block_get(struct inode *inode,
i,
&block_off,
&name_offset);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
xs->base = bucket_block(xs->bucket, block_off);
}
if (ocfs2_xattr_is_local(xs->here)) {
@@ -5665,6 +5669,10 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
i, &xv, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
args->ref_ci,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 902e88527fce..f993be7f2156 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -337,8 +337,6 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
const struct file_operations omfs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..6796f04d6032 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -570,6 +570,7 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
uid = make_kuid(current_user_ns(), user);
gid = make_kgid(current_user_ns(), group);
+retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
if (user != (uid_t) -1) {
if (!uid_valid(uid))
@@ -586,7 +587,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
-retry_deleg:
mutex_lock(&inode->i_mutex);
error = security_path_chown(path, uid, gid);
if (!error)
@@ -734,10 +734,10 @@ static int do_dentry_open(struct file *f,
if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
i_readcount_inc(inode);
if ((f->f_mode & FMODE_READ) &&
- likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
+ likely(f->f_op->read || f->f_op->read_iter))
f->f_mode |= FMODE_CAN_READ;
if ((f->f_mode & FMODE_WRITE) &&
- likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
+ likely(f->f_op->write || f->f_op->write_iter))
f->f_mode |= FMODE_CAN_WRITE;
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -988,9 +988,6 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
return ERR_PTR(err);
if (flags & O_CREAT)
return ERR_PTR(-EINVAL);
- if (!filename && (flags & O_DIRECTORY))
- if (!dentry->d_inode->i_op->lookup)
- return ERR_PTR(-ENOTDIR);
return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index b90952f528b1..5f0d1993e6e3 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
{
struct ovl_fs *ufs = sb->s_fs_info;
- if (!(*flags & MS_RDONLY) &&
- (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
+ if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
return -EROFS;
return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
break;
default:
+ pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
return -EINVAL;
}
}
+
+ /* Workdir is useless in non-upper mount */
+ if (!config->upperdir && config->workdir) {
+ pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+ config->workdir);
+ kfree(config->workdir);
+ config->workdir = NULL;
+ }
+
return 0;
}
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_stack_depth = 0;
if (ufs->config.upperdir) {
- /* FIXME: workdir is not needed for a R/O mount */
if (!ufs->config.workdir) {
pr_err("overlayfs: missing 'workdir'\n");
goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_free_config;
+ /* Upper fs should not be r/o */
+ if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
+ pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+ err = -EINVAL;
+ goto out_put_upperpath;
+ }
+
err = ovl_mount_dir(ufs->config.workdir, &workpath);
if (err)
goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
err = -EINVAL;
stacklen = ovl_split_lowerdirs(lowertmp);
- if (stacklen > OVL_MAX_STACK)
+ if (stacklen > OVL_MAX_STACK) {
+ pr_err("overlayfs: too many lower directries, limit is %d\n",
+ OVL_MAX_STACK);
goto out_free_lowertmp;
+ } else if (!ufs->config.upperdir && stacklen == 1) {
+ pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+ goto out_free_lowertmp;
+ }
stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ufs->numlower++;
}
- /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */
- if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))
+ /* If the upper fs is nonexistent, we mark overlayfs r/o too */
+ if (!ufs->upper_mnt)
sb->s_flags |= MS_RDONLY;
sb->s_d_op = &ovl_dentry_operations;
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e58e2a6..822da5b7cff0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
-#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -947,9 +946,7 @@ err:
const struct file_operations pipefifo_fops = {
.open = fifo_open,
.llseek = no_llseek,
- .read = new_sync_read,
.read_iter = pipe_read,
- .write = new_sync_write,
.write_iter = pipe_write,
.poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
diff --git a/fs/pnode.c b/fs/pnode.c
index 260ac8f898a4..6367e1e435c6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -362,6 +362,46 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
}
/*
+ * Clear MNT_LOCKED when it can be shown to be safe.
+ *
+ * mount_lock lock must be held for write
+ */
+void propagate_mount_unlock(struct mount *mnt)
+{
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m, *child;
+
+ BUG_ON(parent == mnt);
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+ child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+ if (child)
+ child->mnt.mnt_flags &= ~MNT_LOCKED;
+ }
+}
+
+/*
+ * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
+ */
+static void mark_umount_candidates(struct mount *mnt)
+{
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+
+ BUG_ON(parent == mnt);
+
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+ struct mount *child = __lookup_mnt_last(&m->mnt,
+ mnt->mnt_mountpoint);
+ if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
+ SET_MNT_MARK(child);
+ }
+ }
+}
+
+/*
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
* parent propagates to.
*/
@@ -378,13 +418,16 @@ static void __propagate_umount(struct mount *mnt)
struct mount *child = __lookup_mnt_last(&m->mnt,
mnt->mnt_mountpoint);
/*
- * umount the child only if the child has no
- * other children
+ * umount the child only if the child has no children
+ * and the child is marked safe to unmount.
*/
- if (child && list_empty(&child->mnt_mounts)) {
+ if (!child || !IS_MNT_MARKED(child))
+ continue;
+ CLEAR_MNT_MARK(child);
+ if (list_empty(&child->mnt_mounts)) {
list_del_init(&child->mnt_child);
- hlist_del_init_rcu(&child->mnt_hash);
- hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
+ child->mnt.mnt_flags |= MNT_UMOUNT;
+ list_move_tail(&child->mnt_list, &mnt->mnt_list);
}
}
}
@@ -396,11 +439,14 @@ static void __propagate_umount(struct mount *mnt)
*
* vfsmount lock must be held for write
*/
-int propagate_umount(struct hlist_head *list)
+int propagate_umount(struct list_head *list)
{
struct mount *mnt;
- hlist_for_each_entry(mnt, list, mnt_hash)
+ list_for_each_entry_reverse(mnt, list, mnt_list)
+ mark_umount_candidates(mnt);
+
+ list_for_each_entry(mnt, list, mnt_list)
__propagate_umount(mnt);
return 0;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 4a246358b031..7114ce6e6b9e 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -19,6 +19,9 @@
#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
+#define IS_MNT_LOCKED(m) ((m)->mnt.mnt_flags & MNT_LOCKED)
+#define IS_MNT_LOCKED_AND_LAZY(m) \
+ (((m)->mnt.mnt_flags & (MNT_LOCKED|MNT_SYNC_UMOUNT)) == MNT_LOCKED)
#define CL_EXPIRE 0x01
#define CL_SLAVE 0x02
@@ -40,14 +43,14 @@ static inline void set_mnt_shared(struct mount *mnt)
void change_mnt_propagation(struct mount *, int);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
struct hlist_head *);
-int propagate_umount(struct hlist_head *);
+int propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
+void propagate_mount_unlock(struct mount *);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
-void umount_tree(struct mount *, int);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1295a00ca316..fd02a9ebfc30 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -99,8 +99,8 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
buf = m->buf + m->count;
/* Ignore error for now */
- string_escape_str(tcomm, &buf, m->size - m->count,
- ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
+ buf += string_escape_str(tcomm, buf, m->size - m->count,
+ ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
m->count = buf - m->buf;
seq_putc(m, '\n');
@@ -188,6 +188,24 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
put_cred(cred);
+#ifdef CONFIG_PID_NS
+ seq_puts(m, "\nNStgid:");
+ for (g = ns->level; g <= pid->level; g++)
+ seq_printf(m, "\t%d",
+ task_tgid_nr_ns(p, pid->numbers[g].ns));
+ seq_puts(m, "\nNSpid:");
+ for (g = ns->level; g <= pid->level; g++)
+ seq_printf(m, "\t%d",
+ task_pid_nr_ns(p, pid->numbers[g].ns));
+ seq_puts(m, "\nNSpgid:");
+ for (g = ns->level; g <= pid->level; g++)
+ seq_printf(m, "\t%d",
+ task_pgrp_nr_ns(p, pid->numbers[g].ns));
+ seq_puts(m, "\nNSsid:");
+ for (g = ns->level; g <= pid->level; g++)
+ seq_printf(m, "\t%d",
+ task_session_nr_ns(p, pid->numbers[g].ns));
+#endif
seq_putc(m, '\n');
}
@@ -614,7 +632,9 @@ static int children_seq_show(struct seq_file *seq, void *v)
pid_t pid;
pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
- return seq_printf(seq, "%d ", pid);
+ seq_printf(seq, "%d ", pid);
+
+ return 0;
}
static void *children_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3f3d7aeb0712..7a3b82f986dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -238,13 +238,15 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
wchan = get_wchan(task);
- if (lookup_symbol_name(wchan, symname) < 0)
+ if (lookup_symbol_name(wchan, symname) < 0) {
if (!ptrace_may_access(task, PTRACE_MODE_READ))
return 0;
- else
- return seq_printf(m, "%lu", wchan);
- else
- return seq_printf(m, "%s", symname);
+ seq_printf(m, "%lu", wchan);
+ } else {
+ seq_printf(m, "%s", symname);
+ }
+
+ return 0;
}
#endif /* CONFIG_KALLSYMS */
@@ -309,10 +311,12 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
- return seq_printf(m, "%llu %llu %lu\n",
- (unsigned long long)task->se.sum_exec_runtime,
- (unsigned long long)task->sched_info.run_delay,
- task->sched_info.pcount);
+ seq_printf(m, "%llu %llu %lu\n",
+ (unsigned long long)task->se.sum_exec_runtime,
+ (unsigned long long)task->sched_info.run_delay,
+ task->sched_info.pcount);
+
+ return 0;
}
#endif
@@ -387,7 +391,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
points = oom_badness(task, NULL, NULL, totalpages) *
1000 / totalpages;
read_unlock(&tasklist_lock);
- return seq_printf(m, "%lu\n", points);
+ seq_printf(m, "%lu\n", points);
+
+ return 0;
}
struct limit_names {
@@ -432,15 +438,15 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
* print the file header
*/
seq_printf(m, "%-25s %-20s %-20s %-10s\n",
- "Limit", "Soft Limit", "Hard Limit", "Units");
+ "Limit", "Soft Limit", "Hard Limit", "Units");
for (i = 0; i < RLIM_NLIMITS; i++) {
if (rlim[i].rlim_cur == RLIM_INFINITY)
seq_printf(m, "%-25s %-20s ",
- lnames[i].name, "unlimited");
+ lnames[i].name, "unlimited");
else
seq_printf(m, "%-25s %-20lu ",
- lnames[i].name, rlim[i].rlim_cur);
+ lnames[i].name, rlim[i].rlim_cur);
if (rlim[i].rlim_max == RLIM_INFINITY)
seq_printf(m, "%-20s ", "unlimited");
@@ -462,7 +468,9 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
{
long nr;
unsigned long args[6], sp, pc;
- int res = lock_trace(task);
+ int res;
+
+ res = lock_trace(task);
if (res)
return res;
@@ -477,7 +485,8 @@ static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
args[0], args[1], args[2], args[3], args[4], args[5],
sp, pc);
unlock_trace(task);
- return res;
+
+ return 0;
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
@@ -2002,12 +2011,13 @@ static int show_timer(struct seq_file *m, void *v)
notify = timer->it_sigev_notify;
seq_printf(m, "ID: %d\n", timer->it_id);
- seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo,
- timer->sigq->info.si_value.sival_ptr);
+ seq_printf(m, "signal: %d/%p\n",
+ timer->sigq->info.si_signo,
+ timer->sigq->info.si_value.sival_ptr);
seq_printf(m, "notify: %s/%s.%d\n",
- nstr[notify & ~SIGEV_THREAD_ID],
- (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
- pid_nr_ns(timer->it_pid, tp->ns));
+ nstr[notify & ~SIGEV_THREAD_ID],
+ (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
+ pid_nr_ns(timer->it_pid, tp->ns));
seq_printf(m, "ClockID: %d\n", timer->it_clock);
return 0;
@@ -2352,21 +2362,23 @@ static int do_io_accounting(struct task_struct *task, struct seq_file *m, int wh
unlock_task_sighand(task, &flags);
}
- result = seq_printf(m,
- "rchar: %llu\n"
- "wchar: %llu\n"
- "syscr: %llu\n"
- "syscw: %llu\n"
- "read_bytes: %llu\n"
- "write_bytes: %llu\n"
- "cancelled_write_bytes: %llu\n",
- (unsigned long long)acct.rchar,
- (unsigned long long)acct.wchar,
- (unsigned long long)acct.syscr,
- (unsigned long long)acct.syscw,
- (unsigned long long)acct.read_bytes,
- (unsigned long long)acct.write_bytes,
- (unsigned long long)acct.cancelled_write_bytes);
+ seq_printf(m,
+ "rchar: %llu\n"
+ "wchar: %llu\n"
+ "syscr: %llu\n"
+ "syscw: %llu\n"
+ "read_bytes: %llu\n"
+ "write_bytes: %llu\n"
+ "cancelled_write_bytes: %llu\n",
+ (unsigned long long)acct.rchar,
+ (unsigned long long)acct.wchar,
+ (unsigned long long)acct.syscr,
+ (unsigned long long)acct.syscw,
+ (unsigned long long)acct.read_bytes,
+ (unsigned long long)acct.write_bytes,
+ (unsigned long long)acct.cancelled_write_bytes);
+ result = 0;
+
out_unlock:
mutex_unlock(&task->signal->cred_guard_mutex);
return result;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 8e5ad83b629a..af84ad04df77 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -8,6 +8,7 @@
#include <linux/security.h>
#include <linux/file.h>
#include <linux/seq_file.h>
+#include <linux/fs.h>
#include <linux/proc_fs.h>
@@ -48,17 +49,23 @@ static int seq_show(struct seq_file *m, void *v)
put_files_struct(files);
}
- if (!ret) {
- seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
- (long long)file->f_pos, f_flags,
- real_mount(file->f_path.mnt)->mnt_id);
- if (file->f_op->show_fdinfo)
- file->f_op->show_fdinfo(m, file);
- ret = seq_has_overflowed(m);
- fput(file);
- }
+ if (ret)
+ return ret;
- return ret;
+ seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
+ (long long)file->f_pos, f_flags,
+ real_mount(file->f_path.mnt)->mnt_id);
+
+ show_fd_locks(m, file, files);
+ if (seq_has_overflowed(m))
+ goto out;
+
+ if (file->f_op->show_fdinfo)
+ file->f_op->show_fdinfo(m, file);
+
+out:
+ fput(file);
+ return 0;
}
static int seq_fdinfo_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 956b75d61809..6dee68d013ff 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1325,6 +1325,9 @@ out:
static int pagemap_open(struct inode *inode, struct file *file)
{
+ /* do not disclose physical addresses: attack vector */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
"to stop being page-shift some time soon. See the "
"linux/Documentation/vm/pagemap.txt for details.\n");
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index b32ce53d24ee..56e1ffda4d89 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -364,6 +364,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
case PSTORE_TYPE_PMSG:
scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id);
break;
+ case PSTORE_TYPE_PPC_OPAL:
+ sprintf(name, "powerpc-opal-%s-%lld", psname, id);
+ break;
case PSTORE_TYPE_UNKNOWN:
scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id);
break;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 39d1373128e9..44a549beeafa 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -539,6 +539,9 @@ static int ramoops_probe(struct platform_device *pdev)
mem_address = pdata->mem_address;
record_size = pdata->record_size;
dump_oops = pdata->dump_oops;
+ ramoops_console_size = pdata->console_size;
+ ramoops_pmsg_size = pdata->pmsg_size;
+ ramoops_ftrace_size = pdata->ftrace_size;
pr_info("attached 0x%lx@0x%llx, ecc: %d/%d\n",
cxt->size, (unsigned long long)cxt->phys_addr,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 0ccd4ba3a246..ecc25cf0ee6e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -900,14 +900,17 @@ static inline struct dquot **i_dquot(struct inode *inode)
static int dqinit_needed(struct inode *inode, int type)
{
+ struct dquot * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
return 0;
+
+ dquots = i_dquot(inode);
if (type != -1)
- return !i_dquot(inode)[type];
+ return !dquots[type];
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (!i_dquot(inode)[cnt])
+ if (!dquots[cnt])
return 1;
return 0;
}
@@ -970,12 +973,13 @@ static void add_dquot_ref(struct super_block *sb, int type)
static void remove_inode_dquot_ref(struct inode *inode, int type,
struct list_head *tofree_head)
{
- struct dquot *dquot = i_dquot(inode)[type];
+ struct dquot **dquots = i_dquot(inode);
+ struct dquot *dquot = dquots[type];
- i_dquot(inode)[type] = NULL;
if (!dquot)
return;
+ dquots[type] = NULL;
if (list_empty(&dquot->dq_free)) {
/*
* The inode still has reference to dquot so it can't be in the
@@ -1159,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn)
return uid_eq(current_fsuid(), warn->w_dq_id.uid);
case GRPQUOTA:
return in_group_p(warn->w_dq_id.gid);
- case PRJQUOTA: /* Never taken... Just make gcc happy */
- return 0;
+ case PRJQUOTA:
+ return 1;
}
return 0;
}
@@ -1389,16 +1393,21 @@ static int dquot_active(const struct inode *inode)
static void __dquot_initialize(struct inode *inode, int type)
{
int cnt, init_needed = 0;
- struct dquot *got[MAXQUOTAS];
+ struct dquot **dquots, *got[MAXQUOTAS];
struct super_block *sb = inode->i_sb;
qsize_t rsv;
if (!dquot_active(inode))
return;
+ dquots = i_dquot(inode);
+
/* First get references to structures we might need. */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
struct kqid qid;
+ kprojid_t projid;
+ int rc;
+
got[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
@@ -1407,8 +1416,12 @@ static void __dquot_initialize(struct inode *inode, int type)
* we check it without locking here to avoid unnecessary
* dqget()/dqput() calls.
*/
- if (i_dquot(inode)[cnt])
+ if (dquots[cnt])
+ continue;
+
+ if (!sb_has_quota_active(sb, cnt))
continue;
+
init_needed = 1;
switch (cnt) {
@@ -1418,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type)
case GRPQUOTA:
qid = make_kqid_gid(inode->i_gid);
break;
+ case PRJQUOTA:
+ rc = inode->i_sb->dq_op->get_projid(inode, &projid);
+ if (rc)
+ continue;
+ qid = make_kqid_projid(projid);
+ break;
}
got[cnt] = dqget(sb, qid);
}
@@ -1438,8 +1457,8 @@ static void __dquot_initialize(struct inode *inode, int type)
/* We could race with quotaon or dqget() could have failed */
if (!got[cnt])
continue;
- if (!i_dquot(inode)[cnt]) {
- i_dquot(inode)[cnt] = got[cnt];
+ if (!dquots[cnt]) {
+ dquots[cnt] = got[cnt];
got[cnt] = NULL;
/*
* Make quota reservation system happy if someone
@@ -1447,7 +1466,7 @@ static void __dquot_initialize(struct inode *inode, int type)
*/
rsv = inode_get_rsv_space(inode);
if (unlikely(rsv))
- dquot_resv_space(i_dquot(inode)[cnt], rsv);
+ dquot_resv_space(dquots[cnt], rsv);
}
}
out_err:
@@ -1473,12 +1492,13 @@ EXPORT_SYMBOL(dquot_initialize);
static void __dquot_drop(struct inode *inode)
{
int cnt;
+ struct dquot **dquots = i_dquot(inode);
struct dquot *put[MAXQUOTAS];
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- put[cnt] = i_dquot(inode)[cnt];
- i_dquot(inode)[cnt] = NULL;
+ put[cnt] = dquots[cnt];
+ dquots[cnt] = NULL;
}
spin_unlock(&dq_data_lock);
dqput_all(put);
@@ -1486,6 +1506,7 @@ static void __dquot_drop(struct inode *inode)
void dquot_drop(struct inode *inode)
{
+ struct dquot * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
@@ -1498,8 +1519,9 @@ void dquot_drop(struct inode *inode)
* must assure that nobody can come after the DQUOT_DROP and
* add quota pointers back anyway.
*/
+ dquots = i_dquot(inode);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (i_dquot(inode)[cnt])
+ if (dquots[cnt])
break;
}
@@ -1600,8 +1622,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot **dquots = i_dquot(inode);
int reserve = flags & DQUOT_SPACE_RESERVE;
+ struct dquot **dquots;
if (!dquot_active(inode)) {
inode_incr_space(inode, number, reserve);
@@ -1611,6 +1633,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1652,13 +1675,14 @@ int dquot_alloc_inode(struct inode *inode)
{
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots = i_dquot(inode);
+ struct dquot * const *dquots;
if (!dquot_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1690,6 +1714,7 @@ EXPORT_SYMBOL(dquot_alloc_inode);
*/
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
+ struct dquot **dquots;
int cnt, index;
if (!dquot_active(inode)) {
@@ -1697,18 +1722,18 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
return 0;
}
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (i_dquot(inode)[cnt])
- dquot_claim_reserved_space(i_dquot(inode)[cnt],
- number);
+ if (dquots[cnt])
+ dquot_claim_reserved_space(dquots[cnt], number);
}
/* Update inode bytes */
inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(i_dquot(inode));
+ mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
return 0;
}
@@ -1719,6 +1744,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
+ struct dquot **dquots;
int cnt, index;
if (!dquot_active(inode)) {
@@ -1726,18 +1752,18 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
return;
}
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (i_dquot(inode)[cnt])
- dquot_reclaim_reserved_space(i_dquot(inode)[cnt],
- number);
+ if (dquots[cnt])
+ dquot_reclaim_reserved_space(dquots[cnt], number);
}
/* Update inode bytes */
inode_reclaim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(i_dquot(inode));
+ mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index);
return;
}
@@ -1750,7 +1776,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot **dquots = i_dquot(inode);
+ struct dquot **dquots;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
if (!dquot_active(inode)) {
@@ -1758,6 +1784,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
return;
}
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1793,12 +1820,13 @@ void dquot_free_inode(struct inode *inode)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots = i_dquot(inode);
+ struct dquot * const *dquots;
int index;
if (!dquot_active(inode))
return;
+ dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -2161,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
error = -EROFS;
goto out_fmt;
}
- if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
+ if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+ (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
error = -EINVAL;
goto out_fmt;
}
@@ -2614,55 +2643,73 @@ out:
EXPORT_SYMBOL(dquot_set_dqblk);
/* Generic routine for getting common part of quota file information */
-int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_get_state(struct super_block *sb, struct qc_state *state)
{
struct mem_dqinfo *mi;
+ struct qc_type_state *tstate;
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int type;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_active(sb, type)) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
+ memset(state, 0, sizeof(*state));
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!sb_has_quota_active(sb, type))
+ continue;
+ tstate = state->s_state + type;
+ mi = sb_dqopt(sb)->info + type;
+ tstate->flags = QCI_ACCT_ENABLED;
+ spin_lock(&dq_data_lock);
+ if (mi->dqi_flags & DQF_SYS_FILE)
+ tstate->flags |= QCI_SYSFILE;
+ if (mi->dqi_flags & DQF_ROOT_SQUASH)
+ tstate->flags |= QCI_ROOT_SQUASH;
+ if (sb_has_quota_limits_enabled(sb, type))
+ tstate->flags |= QCI_LIMITS_ENFORCED;
+ tstate->spc_timelimit = mi->dqi_bgrace;
+ tstate->ino_timelimit = mi->dqi_igrace;
+ tstate->ino = dqopt->files[type]->i_ino;
+ tstate->blocks = dqopt->files[type]->i_blocks;
+ tstate->nextents = 1; /* We don't know... */
+ spin_unlock(&dq_data_lock);
}
- mi = sb_dqopt(sb)->info + type;
- spin_lock(&dq_data_lock);
- ii->dqi_bgrace = mi->dqi_bgrace;
- ii->dqi_igrace = mi->dqi_igrace;
- ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK;
- ii->dqi_valid = IIF_ALL;
- spin_unlock(&dq_data_lock);
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
}
-EXPORT_SYMBOL(dquot_get_dqinfo);
+EXPORT_SYMBOL(dquot_get_state);
/* Generic routine for setting common part of quota file information */
-int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
{
struct mem_dqinfo *mi;
int err = 0;
+ if ((ii->i_fieldmask & QC_WARNS_MASK) ||
+ (ii->i_fieldmask & QC_RT_SPC_TIMER))
+ return -EINVAL;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
if (!sb_has_quota_active(sb, type)) {
err = -ESRCH;
goto out;
}
mi = sb_dqopt(sb)->info + type;
- if (ii->dqi_valid & IIF_FLAGS) {
- if (ii->dqi_flags & ~DQF_SETINFO_MASK ||
- (ii->dqi_flags & DQF_ROOT_SQUASH &&
+ if (ii->i_fieldmask & QC_FLAGS) {
+ if ((ii->i_flags & QCI_ROOT_SQUASH &&
mi->dqi_format->qf_fmt_id != QFMT_VFS_OLD)) {
err = -EINVAL;
goto out;
}
}
spin_lock(&dq_data_lock);
- if (ii->dqi_valid & IIF_BGRACE)
- mi->dqi_bgrace = ii->dqi_bgrace;
- if (ii->dqi_valid & IIF_IGRACE)
- mi->dqi_igrace = ii->dqi_igrace;
- if (ii->dqi_valid & IIF_FLAGS)
- mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) |
- (ii->dqi_flags & DQF_SETINFO_MASK);
+ if (ii->i_fieldmask & QC_SPC_TIMER)
+ mi->dqi_bgrace = ii->i_spc_timelimit;
+ if (ii->i_fieldmask & QC_INO_TIMER)
+ mi->dqi_igrace = ii->i_ino_timelimit;
+ if (ii->i_fieldmask & QC_FLAGS) {
+ if (ii->i_flags & QCI_ROOT_SQUASH)
+ mi->dqi_flags |= DQF_ROOT_SQUASH;
+ else
+ mi->dqi_flags &= ~DQF_ROOT_SQUASH;
+ }
spin_unlock(&dq_data_lock);
mark_info_dirty(sb, type);
/* Force write to disk */
@@ -2677,7 +2724,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
.quota_on = dquot_quota_on,
.quota_off = dquot_quota_off,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
@@ -2688,7 +2735,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
.quota_enable = dquot_quota_enable,
.quota_disable = dquot_quota_disable,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index d14a799c7785..86ded7375c21 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -118,13 +118,30 @@ static int quota_getfmt(struct super_block *sb, int type, void __user *addr)
static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
{
- struct if_dqinfo info;
+ struct qc_state state;
+ struct qc_type_state *tstate;
+ struct if_dqinfo uinfo;
int ret;
- if (!sb->s_qcop->get_info)
+ /* This checks whether qc_state has enough entries... */
+ BUILD_BUG_ON(MAXQUOTAS > XQM_MAXQUOTAS);
+ if (!sb->s_qcop->get_state)
return -ENOSYS;
- ret = sb->s_qcop->get_info(sb, type, &info);
- if (!ret && copy_to_user(addr, &info, sizeof(info)))
+ ret = sb->s_qcop->get_state(sb, &state);
+ if (ret)
+ return ret;
+ tstate = state.s_state + type;
+ if (!(tstate->flags & QCI_ACCT_ENABLED))
+ return -ESRCH;
+ memset(&uinfo, 0, sizeof(uinfo));
+ uinfo.dqi_bgrace = tstate->spc_timelimit;
+ uinfo.dqi_igrace = tstate->ino_timelimit;
+ if (tstate->flags & QCI_SYSFILE)
+ uinfo.dqi_flags |= DQF_SYS_FILE;
+ if (tstate->flags & QCI_ROOT_SQUASH)
+ uinfo.dqi_flags |= DQF_ROOT_SQUASH;
+ uinfo.dqi_valid = IIF_ALL;
+ if (!ret && copy_to_user(addr, &uinfo, sizeof(uinfo)))
return -EFAULT;
return ret;
}
@@ -132,12 +149,31 @@ static int quota_getinfo(struct super_block *sb, int type, void __user *addr)
static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
{
struct if_dqinfo info;
+ struct qc_info qinfo;
if (copy_from_user(&info, addr, sizeof(info)))
return -EFAULT;
if (!sb->s_qcop->set_info)
return -ENOSYS;
- return sb->s_qcop->set_info(sb, type, &info);
+ if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE))
+ return -EINVAL;
+ memset(&qinfo, 0, sizeof(qinfo));
+ if (info.dqi_valid & IIF_FLAGS) {
+ if (info.dqi_flags & ~DQF_SETINFO_MASK)
+ return -EINVAL;
+ if (info.dqi_flags & DQF_ROOT_SQUASH)
+ qinfo.i_flags |= QCI_ROOT_SQUASH;
+ qinfo.i_fieldmask |= QC_FLAGS;
+ }
+ if (info.dqi_valid & IIF_BGRACE) {
+ qinfo.i_spc_timelimit = info.dqi_bgrace;
+ qinfo.i_fieldmask |= QC_SPC_TIMER;
+ }
+ if (info.dqi_valid & IIF_IGRACE) {
+ qinfo.i_ino_timelimit = info.dqi_igrace;
+ qinfo.i_fieldmask |= QC_INO_TIMER;
+ }
+ return sb->s_qcop->set_info(sb, type, &qinfo);
}
static inline qsize_t qbtos(qsize_t blocks)
@@ -252,25 +288,149 @@ static int quota_disable(struct super_block *sb, void __user *addr)
return sb->s_qcop->quota_disable(sb, flags);
}
+static int quota_state_to_flags(struct qc_state *state)
+{
+ int flags = 0;
+
+ if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED)
+ flags |= FS_QUOTA_UDQ_ACCT;
+ if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED)
+ flags |= FS_QUOTA_UDQ_ENFD;
+ if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)
+ flags |= FS_QUOTA_GDQ_ACCT;
+ if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED)
+ flags |= FS_QUOTA_GDQ_ENFD;
+ if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED)
+ flags |= FS_QUOTA_PDQ_ACCT;
+ if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED)
+ flags |= FS_QUOTA_PDQ_ENFD;
+ return flags;
+}
+
+static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
+{
+ int type;
+ struct qc_state state;
+ int ret;
+
+ ret = sb->s_qcop->get_state(sb, &state);
+ if (ret < 0)
+ return ret;
+
+ memset(fqs, 0, sizeof(*fqs));
+ fqs->qs_version = FS_QSTAT_VERSION;
+ fqs->qs_flags = quota_state_to_flags(&state);
+ /* No quota enabled? */
+ if (!fqs->qs_flags)
+ return -ENOSYS;
+ fqs->qs_incoredqs = state.s_incoredqs;
+ /*
+ * GETXSTATE quotactl has space for just one set of time limits so
+ * report them for the first enabled quota type
+ */
+ for (type = 0; type < XQM_MAXQUOTAS; type++)
+ if (state.s_state[type].flags & QCI_ACCT_ENABLED)
+ break;
+ BUG_ON(type == XQM_MAXQUOTAS);
+ fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
+ fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
+ fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
+ fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
+ fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
+ if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+ fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
+ fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
+ fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
+ }
+ if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+ fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
+ fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
+ fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
+ }
+ if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+ /*
+ * Q_XGETQSTAT doesn't have room for both group and project
+ * quotas. So, allow the project quota values to be copied out
+ * only if there is no group quota information available.
+ */
+ if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) {
+ fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino;
+ fqs->qs_gquota.qfs_nblks =
+ state.s_state[PRJQUOTA].blocks;
+ fqs->qs_gquota.qfs_nextents =
+ state.s_state[PRJQUOTA].nextents;
+ }
+ }
+ return 0;
+}
+
static int quota_getxstate(struct super_block *sb, void __user *addr)
{
struct fs_quota_stat fqs;
int ret;
- if (!sb->s_qcop->get_xstate)
+ if (!sb->s_qcop->get_state)
return -ENOSYS;
- ret = sb->s_qcop->get_xstate(sb, &fqs);
+ ret = quota_getstate(sb, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
}
+static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
+{
+ int type;
+ struct qc_state state;
+ int ret;
+
+ ret = sb->s_qcop->get_state(sb, &state);
+ if (ret < 0)
+ return ret;
+
+ memset(fqs, 0, sizeof(*fqs));
+ fqs->qs_version = FS_QSTAT_VERSION;
+ fqs->qs_flags = quota_state_to_flags(&state);
+ /* No quota enabled? */
+ if (!fqs->qs_flags)
+ return -ENOSYS;
+ fqs->qs_incoredqs = state.s_incoredqs;
+ /*
+ * GETXSTATV quotactl has space for just one set of time limits so
+ * report them for the first enabled quota type
+ */
+ for (type = 0; type < XQM_MAXQUOTAS; type++)
+ if (state.s_state[type].flags & QCI_ACCT_ENABLED)
+ break;
+ BUG_ON(type == XQM_MAXQUOTAS);
+ fqs->qs_btimelimit = state.s_state[type].spc_timelimit;
+ fqs->qs_itimelimit = state.s_state[type].ino_timelimit;
+ fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
+ fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
+ fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
+ if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+ fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
+ fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
+ fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
+ }
+ if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+ fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
+ fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
+ fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
+ }
+ if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+ fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino;
+ fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks;
+ fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents;
+ }
+ return 0;
+}
+
static int quota_getxstatev(struct super_block *sb, void __user *addr)
{
struct fs_quota_statv fqs;
int ret;
- if (!sb->s_qcop->get_xstatev)
+ if (!sb->s_qcop->get_state)
return -ENOSYS;
memset(&fqs, 0, sizeof(fqs));
@@ -284,7 +444,7 @@ static int quota_getxstatev(struct super_block *sb, void __user *addr)
default:
return -EINVAL;
}
- ret = sb->s_qcop->get_xstatev(sb, &fqs);
+ ret = quota_getstatev(sb, &fqs);
if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
return -EFAULT;
return ret;
@@ -357,6 +517,30 @@ static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src)
dst->d_fieldmask |= QC_RT_SPACE;
}
+static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst,
+ struct fs_disk_quota *src)
+{
+ memset(dst, 0, sizeof(*dst));
+ dst->i_spc_timelimit = src->d_btimer;
+ dst->i_ino_timelimit = src->d_itimer;
+ dst->i_rt_spc_timelimit = src->d_rtbtimer;
+ dst->i_ino_warnlimit = src->d_iwarns;
+ dst->i_spc_warnlimit = src->d_bwarns;
+ dst->i_rt_spc_warnlimit = src->d_rtbwarns;
+ if (src->d_fieldmask & FS_DQ_BWARNS)
+ dst->i_fieldmask |= QC_SPC_WARNS;
+ if (src->d_fieldmask & FS_DQ_IWARNS)
+ dst->i_fieldmask |= QC_INO_WARNS;
+ if (src->d_fieldmask & FS_DQ_RTBWARNS)
+ dst->i_fieldmask |= QC_RT_SPC_WARNS;
+ if (src->d_fieldmask & FS_DQ_BTIMER)
+ dst->i_fieldmask |= QC_SPC_TIMER;
+ if (src->d_fieldmask & FS_DQ_ITIMER)
+ dst->i_fieldmask |= QC_INO_TIMER;
+ if (src->d_fieldmask & FS_DQ_RTBTIMER)
+ dst->i_fieldmask |= QC_RT_SPC_TIMER;
+}
+
static int quota_setxquota(struct super_block *sb, int type, qid_t id,
void __user *addr)
{
@@ -371,6 +555,21 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
qid = make_kqid(current_user_ns(), type, id);
if (!qid_valid(qid))
return -EINVAL;
+ /* Are we actually setting timer / warning limits for all users? */
+ if (from_kqid(&init_user_ns, qid) == 0 &&
+ fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) {
+ struct qc_info qinfo;
+ int ret;
+
+ if (!sb->s_qcop->set_info)
+ return -EINVAL;
+ copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq);
+ ret = sb->s_qcop->set_info(sb, type, &qinfo);
+ if (ret)
+ return ret;
+ /* These are already done */
+ fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK);
+ }
copy_from_xfs_dqblk(&qdq, &fdq);
return sb->s_qcop->set_dqblk(sb, qid, &qdq);
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index d65877fbe8f4..58efb83dec1c 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -349,6 +349,13 @@ static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
struct dquot *dquot)
{
int tmp = QT_TREEOFF;
+
+#ifdef __QUOTA_QT_PARANOIA
+ if (info->dqi_blocks <= QT_TREEOFF) {
+ quota_error(dquot->dq_sb, "Quota tree root isn't allocated!");
+ return -EIO;
+ }
+#endif
return do_insert_tree(info, dquot, &tmp, 0);
}
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 9cb10d7197f7..2aa012a68e90 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -117,12 +117,16 @@ static int v2_read_file_info(struct super_block *sb, int type)
qinfo = info->dqi_priv;
if (version == 0) {
/* limits are stored as unsigned 32-bit data */
- info->dqi_max_spc_limit = 0xffffffffULL << QUOTABLOCK_BITS;
+ info->dqi_max_spc_limit = 0xffffffffLL << QUOTABLOCK_BITS;
info->dqi_max_ino_limit = 0xffffffff;
} else {
- /* used space is stored as unsigned 64-bit value in bytes */
- info->dqi_max_spc_limit = 0xffffffffffffffffULL; /* 2^64-1 */
- info->dqi_max_ino_limit = 0xffffffffffffffffULL;
+ /*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+ info->dqi_max_spc_limit = 0x7fffffffffffffffLL; /* 2^63-1 */
+ info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
}
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index f1966b42c2fd..4e95430093d9 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -13,12 +13,14 @@
*/
#define V2_INITQMAGICS {\
0xd9c01f11, /* USRQUOTA */\
- 0xd9c01927 /* GRPQUOTA */\
+ 0xd9c01927, /* GRPQUOTA */\
+ 0xd9c03f14, /* PRJQUOTA */\
}
#define V2_INITQVERSIONS {\
1, /* USRQUOTA */\
- 1 /* GRPQUOTA */\
+ 1, /* GRPQUOTA */\
+ 1, /* PRJQUOTA */\
}
/* First generic header */
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 4f56de822d2f..183a212694bf 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,9 +31,7 @@
#include "internal.h"
const struct file_operations ramfs_file_operations = {
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = noop_fsync,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index f6ab41b39612..0b38befa69f3 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -44,9 +44,7 @@ const struct file_operations ramfs_file_operations = {
.mmap_capabilities = ramfs_mmap_capabilities,
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b68786d66..819ef3faf1bb 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
-#include <linux/aio.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/export.h>
@@ -23,13 +22,10 @@
#include <asm/unistd.h>
typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
-typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
const struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.mmap = generic_file_readonly_mmap,
.splice_read = generic_file_splice_read,
@@ -343,13 +339,10 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = iov_iter_count(iter);
iter->type |= READ;
ret = file->f_op->read_iter(&kiocb, iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
-
+ BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
return ret;
@@ -366,13 +359,10 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = iov_iter_count(iter);
iter->type |= WRITE;
ret = file->f_op->write_iter(&kiocb, iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
-
+ BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
*ppos = kiocb.ki_pos;
return ret;
@@ -418,26 +408,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
}
-ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = buf, .iov_len = len };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
-
- ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-
-EXPORT_SYMBOL(do_sync_read);
-
-ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
+static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
@@ -446,34 +417,25 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
iov_iter_init(&iter, READ, &iov, 1, len);
ret = filp->f_op->read_iter(&kiocb, &iter);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
-EXPORT_SYMBOL(new_sync_read);
-
ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
loff_t *pos)
{
- ssize_t ret;
-
if (file->f_op->read)
- ret = file->f_op->read(file, buf, count, pos);
- else if (file->f_op->aio_read)
- ret = do_sync_read(file, buf, count, pos);
+ return file->f_op->read(file, buf, count, pos);
else if (file->f_op->read_iter)
- ret = new_sync_read(file, buf, count, pos);
+ return new_sync_read(file, buf, count, pos);
else
- ret = -EINVAL;
-
- return ret;
+ return -EINVAL;
}
+EXPORT_SYMBOL(__vfs_read);
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
@@ -502,26 +464,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
EXPORT_SYMBOL(vfs_read);
-ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
-{
- struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
- struct kiocb kiocb;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
-
- ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-
-EXPORT_SYMBOL(do_sync_write);
-
-ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
+static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
struct kiocb kiocb;
@@ -530,17 +473,26 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
iov_iter_init(&iter, WRITE, &iov, 1, len);
ret = filp->f_op->write_iter(&kiocb, &iter);
- if (-EIOCBQUEUED == ret)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
+ BUG_ON(ret == -EIOCBQUEUED);
+ if (ret > 0)
+ *ppos = kiocb.ki_pos;
return ret;
}
-EXPORT_SYMBOL(new_sync_write);
+ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
+ loff_t *pos)
+{
+ if (file->f_op->write)
+ return file->f_op->write(file, p, count, pos);
+ else if (file->f_op->write_iter)
+ return new_sync_write(file, p, count, pos);
+ else
+ return -EINVAL;
+}
+EXPORT_SYMBOL(__vfs_write);
ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
{
@@ -556,12 +508,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
p = (__force const char __user *)buf;
if (count > MAX_RW_COUNT)
count = MAX_RW_COUNT;
- if (file->f_op->write)
- ret = file->f_op->write(file, p, count, pos);
- else if (file->f_op->aio_write)
- ret = do_sync_write(file, p, count, pos);
- else
- ret = new_sync_write(file, p, count, pos);
+ ret = __vfs_write(file, p, count, pos);
set_fs(old_fs);
if (ret > 0) {
fsnotify_modify(file);
@@ -588,12 +535,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
if (ret >= 0) {
count = ret;
file_start_write(file);
- if (file->f_op->write)
- ret = file->f_op->write(file, buf, count, pos);
- else if (file->f_op->aio_write)
- ret = do_sync_write(file, buf, count, pos);
- else
- ret = new_sync_write(file, buf, count, pos);
+ ret = __vfs_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file);
add_wchar(current, ret);
@@ -710,60 +652,32 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
}
EXPORT_SYMBOL(iov_shorten);
-static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
- unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
-{
- struct kiocb kiocb;
- struct iov_iter iter;
- ssize_t ret;
-
- init_sync_kiocb(&kiocb, filp);
- kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
-
- iov_iter_init(&iter, rw, iov, nr_segs, len);
- ret = fn(&kiocb, &iter);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
- *ppos = kiocb.ki_pos;
- return ret;
-}
-
-static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
+static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
+ loff_t *ppos, iter_fn_t fn)
{
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
- kiocb.ki_nbytes = len;
- ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
- if (ret == -EIOCBQUEUED)
- ret = wait_on_sync_kiocb(&kiocb);
+ ret = fn(&kiocb, iter);
+ BUG_ON(ret == -EIOCBQUEUED);
*ppos = kiocb.ki_pos;
return ret;
}
/* Do it by hand, with file-ops */
-static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
+static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
+ loff_t *ppos, io_fn_t fn)
{
- struct iovec *vector = iov;
ssize_t ret = 0;
- while (nr_segs > 0) {
- void __user *base;
- size_t len;
+ while (iov_iter_count(iter)) {
+ struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
- base = vector->iov_base;
- len = vector->iov_len;
- vector++;
- nr_segs--;
-
- nr = fn(filp, base, len, ppos);
+ nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
if (nr < 0) {
if (!ret)
@@ -771,8 +685,9 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
break;
}
ret += nr;
- if (nr != len)
+ if (nr != iovec.iov_len)
break;
+ iov_iter_advance(iter, nr);
}
return ret;
@@ -863,48 +778,42 @@ static ssize_t do_readv_writev(int type, struct file *file,
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
- iov_fn_t fnv;
iter_fn_t iter_fn;
- ret = rw_copy_check_uvector(type, uvector, nr_segs,
- ARRAY_SIZE(iovstack), iovstack, &iov);
- if (ret <= 0)
- goto out;
+ ret = import_iovec(type, uvector, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;
- tot_len = ret;
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
- fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->aio_read;
iter_fn = file->f_op->read_iter;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->aio_write;
iter_fn = file->f_op->write_iter;
file_start_write(file);
}
if (iter_fn)
- ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
- pos, iter_fn);
- else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn);
if (type != READ)
file_end_write(file);
out:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
@@ -1043,48 +952,42 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
+ struct iov_iter iter;
ssize_t ret;
io_fn_t fn;
- iov_fn_t fnv;
iter_fn_t iter_fn;
- ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
- UIO_FASTIOV, iovstack, &iov);
- if (ret <= 0)
- goto out;
+ ret = compat_import_iovec(type, uvector, nr_segs,
+ UIO_FASTIOV, &iov, &iter);
+ if (ret < 0)
+ return ret;
- tot_len = ret;
+ tot_len = iov_iter_count(&iter);
+ if (!tot_len)
+ goto out;
ret = rw_verify_area(type, file, pos, tot_len);
if (ret < 0)
goto out;
- fnv = NULL;
if (type == READ) {
fn = file->f_op->read;
- fnv = file->f_op->aio_read;
iter_fn = file->f_op->read_iter;
} else {
fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->aio_write;
iter_fn = file->f_op->write_iter;
file_start_write(file);
}
if (iter_fn)
- ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
- pos, iter_fn);
- else if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn);
if (type != READ)
file_end_write(file);
out:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
if ((ret + (type == READ)) > 0) {
if (type == READ)
fsnotify_access(file);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 751dd3f4346b..96a1bcf33db4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -243,8 +243,6 @@ drop_write_lock:
}
const struct file_operations reiserfs_file_operations = {
- .read = new_sync_read,
- .write = new_sync_write,
.unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e1f995..742242b60972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
#include <linux/writeback.h>
#include <linux/quotaops.h>
#include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
@@ -3278,22 +3278,22 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
* We thank Mingming Cao for helping us understand in great detail what
* to do in this section of the code.
*/
-static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+ ret = blockdev_direct_IO(iocb, inode, iter, offset,
reiserfs_get_blocks_direct_io);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
- if (unlikely((rw & WRITE) && ret < 0)) {
+ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
loff_t end = offset + count;
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index bb79cddf0a1f..2adcde137c3f 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -910,7 +910,6 @@ do { \
if (!(cond)) \
reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \
__FILE__ ":%i:%s: " format "\n", \
- in_interrupt() ? -1 : task_pid_nr(current), \
__LINE__, __func__ , ##args); \
} while (0)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 71fbbe3e2dab..68b5f182984e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -805,7 +805,7 @@ static const struct quotactl_ops reiserfs_qctl_operations = {
.quota_on = reiserfs_quota_on,
.quota_off = dquot_quota_off,
.quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
+ .get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk,
diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c
index 7da9e2153953..1118a0dc6b45 100644
--- a/fs/romfs/mmap-nommu.c
+++ b/fs/romfs/mmap-nommu.c
@@ -81,7 +81,6 @@ static unsigned romfs_mmap_capabilities(struct file *file)
const struct file_operations romfs_ro_fops = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.mmap = romfs_mmap,
diff --git a/fs/splice.c b/fs/splice.c
index 7968da96bebb..476024bb6546 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
#include <linux/gfp.h>
#include <linux/socket.h>
#include <linux/compat.h>
-#include <linux/aio.h>
#include "internal.h"
/*
@@ -524,6 +523,9 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
loff_t isize, left;
int ret;
+ if (IS_DAX(in->f_mapping->host))
+ return default_file_splice_read(in, ppos, pipe, len, flags);
+
isize = i_size_read(in->f_mapping->host);
if (unlikely(*ppos >= isize))
return 0;
@@ -1534,34 +1536,29 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- ssize_t count;
pipe = get_pipe_info(file);
if (!pipe)
return -EBADF;
- ret = rw_copy_check_uvector(READ, uiov, nr_segs,
- ARRAY_SIZE(iovstack), iovstack, &iov);
- if (ret <= 0)
- goto out;
-
- count = ret;
- iov_iter_init(&iter, READ, iov, nr_segs, count);
+ ret = import_iovec(READ, uiov, nr_segs,
+ ARRAY_SIZE(iovstack), &iov, &iter);
+ if (ret < 0)
+ return ret;
+ sd.total_len = iov_iter_count(&iter);
sd.len = 0;
- sd.total_len = count;
sd.flags = flags;
sd.u.data = &iter;
sd.pos = 0;
- pipe_lock(pipe);
- ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
- pipe_unlock(pipe);
-
-out:
- if (iov != iovstack)
- kfree(iov);
+ if (sd.total_len) {
+ pipe_lock(pipe);
+ ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
+ pipe_unlock(pipe);
+ }
+ kfree(iov);
return ret;
}
diff --git a/fs/stat.c b/fs/stat.c
index ae0c3cef9927..19636af5e75c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -66,7 +66,7 @@ int vfs_getattr(struct path *path, struct kstat *stat)
{
int retval;
- retval = security_inode_getattr(path->mnt, path->dentry);
+ retval = security_inode_getattr(path);
if (retval)
return retval;
return vfs_getattr_nosec(path, stat);
diff --git a/fs/super.c b/fs/super.c
index 2b7dc90ccdbb..928c20f47af9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -224,7 +224,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
- s->cleancache_poolid = -1;
+ s->cleancache_poolid = CLEANCACHE_NO_POOL;
s->s_shrink.seeks = DEFAULT_SEEKS;
s->s_shrink.scan_objects = super_cache_scan;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2554d8835b48..b400c04371f0 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -41,7 +41,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (grp->attrs) {
for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
- umode_t mode = 0;
+ umode_t mode = (*attr)->mode;
/*
* In update mode, we're changing the permissions or
@@ -55,9 +55,14 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (!mode)
continue;
}
+
+ WARN(mode & ~(SYSFS_PREALLOC | 0664),
+ "Attribute %s: Invalid permissions 0%o\n",
+ (*attr)->name, mode);
+
+ mode &= SYSFS_PREALLOC | 0664;
error = sysfs_add_file_mode_ns(parent, *attr, false,
- (*attr)->mode | mode,
- NULL);
+ mode, NULL);
if (unlikely(error))
break;
}
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index b00811c75b24..a48e30410ad1 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -21,9 +21,7 @@
*/
const struct file_operations sysv_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = generic_file_fsync,
diff --git a/fs/tracefs/Makefile b/fs/tracefs/Makefile
new file mode 100644
index 000000000000..82fa35b656c4
--- /dev/null
+++ b/fs/tracefs/Makefile
@@ -0,0 +1,4 @@
+tracefs-objs := inode.o
+
+obj-$(CONFIG_TRACING) += tracefs.o
+
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
new file mode 100644
index 000000000000..d92bdf3b079a
--- /dev/null
+++ b/fs/tracefs/inode.c
@@ -0,0 +1,650 @@
+/*
+ * inode.c - part of tracefs, a pseudo file system for activating tracing
+ *
+ * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
+ *
+ * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * tracefs is the file system that is used by the tracing infrastructure.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kobject.h>
+#include <linux/namei.h>
+#include <linux/tracefs.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+
+#define TRACEFS_DEFAULT_MODE 0700
+
+static struct vfsmount *tracefs_mount;
+static int tracefs_mount_count;
+static bool tracefs_registered;
+
+static ssize_t default_read_file(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return 0;
+}
+
+static ssize_t default_write_file(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return count;
+}
+
+static const struct file_operations tracefs_file_operations = {
+ .read = default_read_file,
+ .write = default_write_file,
+ .open = simple_open,
+ .llseek = noop_llseek,
+};
+
+static struct tracefs_dir_ops {
+ int (*mkdir)(const char *name);
+ int (*rmdir)(const char *name);
+} tracefs_ops;
+
+static char *get_dname(struct dentry *dentry)
+{
+ const char *dname;
+ char *name;
+ int len = dentry->d_name.len;
+
+ dname = dentry->d_name.name;
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ memcpy(name, dname, len);
+ name[len] = 0;
+ return name;
+}
+
+static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The mkdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * mkdir routine to handle races.
+ */
+ mutex_unlock(&inode->i_mutex);
+ ret = tracefs_ops.mkdir(name);
+ mutex_lock(&inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
+{
+ char *name;
+ int ret;
+
+ name = get_dname(dentry);
+ if (!name)
+ return -ENOMEM;
+
+ /*
+ * The rmdir call can call the generic functions that create
+ * the files within the tracefs system. It is up to the individual
+ * rmdir routine to handle races.
+ * This time we need to unlock not only the parent (inode) but
+ * also the directory that is being deleted.
+ */
+ mutex_unlock(&inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
+
+ ret = tracefs_ops.rmdir(name);
+
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock(&dentry->d_inode->i_mutex);
+
+ kfree(name);
+
+ return ret;
+}
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .mkdir = tracefs_syscall_mkdir,
+ .rmdir = tracefs_syscall_rmdir,
+};
+
+static struct inode *tracefs_get_inode(struct super_block *sb)
+{
+ struct inode *inode = new_inode(sb);
+ if (inode) {
+ inode->i_ino = get_next_ino();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ }
+ return inode;
+}
+
+struct tracefs_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
+enum {
+ Opt_uid,
+ Opt_gid,
+ Opt_mode,
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_mode, "mode=%o"},
+ {Opt_err, NULL}
+};
+
+struct tracefs_fs_info {
+ struct tracefs_mount_opts mount_opts;
+};
+
+static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+{
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int token;
+ kuid_t uid;
+ kgid_t gid;
+ char *p;
+
+ opts->mode = TRACEFS_DEFAULT_MODE;
+
+ while ((p = strsep(&data, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ opts->uid = uid;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ opts->gid = gid;
+ break;
+ case Opt_mode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ opts->mode = option & S_IALLUGO;
+ break;
+ /*
+ * We might like to report bad mount options here;
+ * but traditionally tracefs has ignored all mount options
+ */
+ }
+ }
+
+ return 0;
+}
+
+static int tracefs_apply_options(struct super_block *sb)
+{
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+ struct inode *inode = sb->s_root->d_inode;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ inode->i_mode &= ~S_IALLUGO;
+ inode->i_mode |= opts->mode;
+
+ inode->i_uid = opts->uid;
+ inode->i_gid = opts->gid;
+
+ return 0;
+}
+
+static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+{
+ int err;
+ struct tracefs_fs_info *fsi = sb->s_fs_info;
+
+ sync_filesystem(sb);
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ tracefs_apply_options(sb);
+
+fail:
+ return err;
+}
+
+static int tracefs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
+ struct tracefs_mount_opts *opts = &fsi->mount_opts;
+
+ if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, opts->uid));
+ if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, opts->gid));
+ if (opts->mode != TRACEFS_DEFAULT_MODE)
+ seq_printf(m, ",mode=%o", opts->mode);
+
+ return 0;
+}
+
+static const struct super_operations tracefs_super_operations = {
+ .statfs = simple_statfs,
+ .remount_fs = tracefs_remount,
+ .show_options = tracefs_show_options,
+};
+
+static int trace_fill_super(struct super_block *sb, void *data, int silent)
+{
+ static struct tree_descr trace_files[] = {{""}};
+ struct tracefs_fs_info *fsi;
+ int err;
+
+ save_mount_options(sb, data);
+
+ fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
+ sb->s_fs_info = fsi;
+ if (!fsi) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ err = tracefs_parse_options(data, &fsi->mount_opts);
+ if (err)
+ goto fail;
+
+ err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
+ if (err)
+ goto fail;
+
+ sb->s_op = &tracefs_super_operations;
+
+ tracefs_apply_options(sb);
+
+ return 0;
+
+fail:
+ kfree(fsi);
+ sb->s_fs_info = NULL;
+ return err;
+}
+
+static struct dentry *trace_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_single(fs_type, flags, data, trace_fill_super);
+}
+
+static struct file_system_type trace_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "tracefs",
+ .mount = trace_mount,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("tracefs");
+
+static struct dentry *start_creating(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+ int error;
+
+ pr_debug("tracefs: creating file '%s'\n",name);
+
+ error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
+ &tracefs_mount_count);
+ if (error)
+ return ERR_PTR(error);
+
+ /* If the parent is not specified, we create it in the root.
+ * We need the root dentry to do this, which is in the super
+ * block. A pointer to that is in the struct vfsmount that we
+ * have around.
+ */
+ if (!parent)
+ parent = tracefs_mount->mnt_root;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ dentry = lookup_one_len(name, parent, strlen(name));
+ if (!IS_ERR(dentry) && dentry->d_inode) {
+ dput(dentry);
+ dentry = ERR_PTR(-EEXIST);
+ }
+ if (IS_ERR(dentry))
+ mutex_unlock(&parent->d_inode->i_mutex);
+ return dentry;
+}
+
+static struct dentry *failed_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ dput(dentry);
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ return NULL;
+}
+
+static struct dentry *end_creating(struct dentry *dentry)
+{
+ mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ return dentry;
+}
+
+/**
+ * tracefs_create_file - create a file in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * file will be created in the root of the tracefs filesystem.
+ * @data: a pointer to something that the caller will want to get to later
+ * on. The inode.i_private pointer will point to this value on
+ * the open() call.
+ * @fops: a pointer to a struct file_operations that should be used for
+ * this file.
+ *
+ * This is the basic "create a file" function for tracefs. It allows for a
+ * wide range of flexibility in creating a file, or a directory (if you want
+ * to create a directory, the tracefs_create_dir() function is
+ * recommended to be used instead.)
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed (no automatic cleanup happens if your module is unloaded,
+ * you are responsible here.) If an error occurs, %NULL will be returned.
+ *
+ * If tracefs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+
+ if (!(mode & S_IFMT))
+ mode |= S_IFREG;
+ BUG_ON(!S_ISREG(mode));
+ dentry = start_creating(name, parent);
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = mode;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
+ inode->i_private = data;
+ d_instantiate(dentry, inode);
+ fsnotify_create(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+static struct dentry *__create_dir(const char *name, struct dentry *parent,
+ const struct inode_operations *ops)
+{
+ struct dentry *dentry = start_creating(name, parent);
+ struct inode *inode;
+
+ if (IS_ERR(dentry))
+ return NULL;
+
+ inode = tracefs_get_inode(dentry->d_sb);
+ if (unlikely(!inode))
+ return failed_creating(dentry);
+
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ inode->i_op = ops;
+ inode->i_fop = &simple_dir_operations;
+
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inc_nlink(inode);
+ d_instantiate(dentry, inode);
+ inc_nlink(dentry->d_parent->d_inode);
+ fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+ return end_creating(dentry);
+}
+
+/**
+ * tracefs_create_dir - create a directory in the tracefs filesystem
+ * @name: a pointer to a string containing the name of the directory to
+ * create.
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is NULL, then the
+ * directory will be created in the root of the tracefs filesystem.
+ *
+ * This function creates a directory in tracefs with the given name.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the tracefs_remove() function when the file is
+ * to be removed. If an error occurs, %NULL will be returned.
+ *
+ * If tracing is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
+{
+ return __create_dir(name, parent, &simple_dir_inode_operations);
+}
+
+/**
+ * tracefs_create_instance_dir - create the tracing instances directory
+ * @name: The name of the instances directory to create
+ * @parent: The parent directory that the instances directory will exist
+ * @mkdir: The function to call when a mkdir is performed.
+ * @rmdir: The function to call when a rmdir is performed.
+ *
+ * Only one instances directory is allowed.
+ *
+ * The instances directory is special as it allows for mkdir and rmdir to
+ * to be done by userspace. When a mkdir or rmdir is performed, the inode
+ * locks are released and the methhods passed in (@mkdir and @rmdir) are
+ * called without locks and with the name of the directory being created
+ * within the instances directory.
+ *
+ * Returns the dentry of the instances directory.
+ */
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+ int (*mkdir)(const char *name),
+ int (*rmdir)(const char *name))
+{
+ struct dentry *dentry;
+
+ /* Only allow one instance of the instances directory. */
+ if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
+ return NULL;
+
+ dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+ if (!dentry)
+ return NULL;
+
+ tracefs_ops.mkdir = mkdir;
+ tracefs_ops.rmdir = rmdir;
+
+ return dentry;
+}
+
+static inline int tracefs_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
+static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
+{
+ int ret = 0;
+
+ if (tracefs_positive(dentry)) {
+ if (dentry->d_inode) {
+ dget(dentry);
+ switch (dentry->d_inode->i_mode & S_IFMT) {
+ case S_IFDIR:
+ ret = simple_rmdir(parent->d_inode, dentry);
+ break;
+ default:
+ simple_unlink(parent->d_inode, dentry);
+ break;
+ }
+ if (!ret)
+ d_delete(dentry);
+ dput(dentry);
+ }
+ }
+ return ret;
+}
+
+/**
+ * tracefs_remove - removes a file or directory from the tracefs filesystem
+ * @dentry: a pointer to a the dentry of the file or directory to be
+ * removed.
+ *
+ * This function removes a file or directory in tracefs that was previously
+ * created with a call to another tracefs function (like
+ * tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove(struct dentry *dentry)
+{
+ struct dentry *parent;
+ int ret;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ mutex_lock(&parent->d_inode->i_mutex);
+ ret = __tracefs_remove(dentry, parent);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ if (!ret)
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+}
+
+/**
+ * tracefs_remove_recursive - recursively removes a directory
+ * @dentry: a pointer to a the dentry of the directory to be removed.
+ *
+ * This function recursively removes a directory tree in tracefs that
+ * was previously created with a call to another tracefs function
+ * (like tracefs_create_file() or variants thereof.)
+ */
+void tracefs_remove_recursive(struct dentry *dentry)
+{
+ struct dentry *child, *parent;
+
+ if (IS_ERR_OR_NULL(dentry))
+ return;
+
+ parent = dentry->d_parent;
+ if (!parent || !parent->d_inode)
+ return;
+
+ parent = dentry;
+ down:
+ mutex_lock(&parent->d_inode->i_mutex);
+ loop:
+ /*
+ * The parent->d_subdirs is protected by the d_lock. Outside that
+ * lock, the child can be unlinked and set to be freed which can
+ * use the d_u.d_child as the rcu head and corrupt this list.
+ */
+ spin_lock(&parent->d_lock);
+ list_for_each_entry(child, &parent->d_subdirs, d_child) {
+ if (!tracefs_positive(child))
+ continue;
+
+ /* perhaps simple_empty(child) makes more sense */
+ if (!list_empty(&child->d_subdirs)) {
+ spin_unlock(&parent->d_lock);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ parent = child;
+ goto down;
+ }
+
+ spin_unlock(&parent->d_lock);
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+
+ /*
+ * The parent->d_lock protects agaist child from unlinking
+ * from d_subdirs. When releasing the parent->d_lock we can
+ * no longer trust that the next pointer is valid.
+ * Restart the loop. We'll skip this one with the
+ * tracefs_positive() check.
+ */
+ goto loop;
+ }
+ spin_unlock(&parent->d_lock);
+
+ mutex_unlock(&parent->d_inode->i_mutex);
+ child = parent;
+ parent = parent->d_parent;
+ mutex_lock(&parent->d_inode->i_mutex);
+
+ if (child != dentry)
+ /* go up */
+ goto loop;
+
+ if (!__tracefs_remove(child, parent))
+ simple_release_fs(&tracefs_mount, &tracefs_mount_count);
+ mutex_unlock(&parent->d_inode->i_mutex);
+}
+
+/**
+ * tracefs_initialized - Tells whether tracefs has been registered
+ */
+bool tracefs_initialized(void)
+{
+ return tracefs_registered;
+}
+
+static struct kobject *trace_kobj;
+
+static int __init tracefs_init(void)
+{
+ int retval;
+
+ trace_kobj = kobject_create_and_add("tracing", kernel_kobj);
+ if (!trace_kobj)
+ return -EINVAL;
+
+ retval = register_filesystem(&trace_fs_type);
+ if (!retval)
+ tracefs_registered = true;
+
+ return retval;
+}
+core_initcall(tracefs_init);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eb997e9c4ab0..11a11b32a2a9 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -509,7 +509,7 @@ again:
c->bi.nospace_rp = 1;
smp_wmb();
} else
- ubifs_err("cannot budget space, error %d", err);
+ ubifs_err(c, "cannot budget space, error %d", err);
return err;
}
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 26b69b2d4a45..63f56619991d 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -225,7 +225,7 @@ out_cancel:
out_up:
up_write(&c->commit_sem);
out:
- ubifs_err("commit failed, error %d", err);
+ ubifs_err(c, "commit failed, error %d", err);
spin_lock(&c->cs_lock);
c->cmt_state = COMMIT_BROKEN;
wake_up(&c->cmt_wq);
@@ -289,7 +289,7 @@ int ubifs_bg_thread(void *info)
int err;
struct ubifs_info *c = info;
- ubifs_msg("background thread \"%s\" started, PID %d",
+ ubifs_msg(c, "background thread \"%s\" started, PID %d",
c->bgt_name, current->pid);
set_freezable();
@@ -324,7 +324,7 @@ int ubifs_bg_thread(void *info)
cond_resched();
}
- ubifs_msg("background thread \"%s\" stops", c->bgt_name);
+ ubifs_msg(c, "background thread \"%s\" stops", c->bgt_name);
return 0;
}
@@ -712,13 +712,13 @@ out:
return 0;
out_dump:
- ubifs_err("dumping index node (iip=%d)", i->iip);
+ ubifs_err(c, "dumping index node (iip=%d)", i->iip);
ubifs_dump_node(c, idx);
list_del(&i->list);
kfree(i);
if (!list_empty(&list)) {
i = list_entry(list.prev, struct idx_node, list);
- ubifs_err("dumping parent index node");
+ ubifs_err(c, "dumping parent index node");
ubifs_dump_node(c, &i->idx);
}
out_free:
@@ -727,7 +727,7 @@ out_free:
list_del(&i->list);
kfree(i);
}
- ubifs_err("failed, error %d", err);
+ ubifs_err(c, "failed, error %d", err);
if (err > 0)
err = -EINVAL;
return err;
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
index 2bfa0953335d..565cb56d7225 100644
--- a/fs/ubifs/compress.c
+++ b/fs/ubifs/compress.c
@@ -92,8 +92,8 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
* Note, if the input buffer was not compressed, it is copied to the output
* buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
*/
-void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
- int *compr_type)
+void ubifs_compress(const struct ubifs_info *c, const void *in_buf,
+ int in_len, void *out_buf, int *out_len, int *compr_type)
{
int err;
struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
@@ -112,9 +112,9 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
if (compr->comp_mutex)
mutex_unlock(compr->comp_mutex);
if (unlikely(err)) {
- ubifs_warn("cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
+ ubifs_warn(c, "cannot compress %d bytes, compressor %s, error %d, leave data uncompressed",
in_len, compr->name, err);
- goto no_compr;
+ goto no_compr;
}
/*
@@ -144,21 +144,21 @@ no_compr:
* The length of the uncompressed data is returned in @out_len. This functions
* returns %0 on success or a negative error code on failure.
*/
-int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
- int *out_len, int compr_type)
+int ubifs_decompress(const struct ubifs_info *c, const void *in_buf,
+ int in_len, void *out_buf, int *out_len, int compr_type)
{
int err;
struct ubifs_compressor *compr;
if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
- ubifs_err("invalid compression type %d", compr_type);
+ ubifs_err(c, "invalid compression type %d", compr_type);
return -EINVAL;
}
compr = ubifs_compressors[compr_type];
if (unlikely(!compr->capi_name)) {
- ubifs_err("%s compression is not compiled in", compr->name);
+ ubifs_err(c, "%s compression is not compiled in", compr->name);
return -EINVAL;
}
@@ -175,7 +175,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
if (compr->decomp_mutex)
mutex_unlock(compr->decomp_mutex);
if (err)
- ubifs_err("cannot decompress %d bytes, compressor %s, error %d",
+ ubifs_err(c, "cannot decompress %d bytes, compressor %s, error %d",
in_len, compr->name, err);
return err;
@@ -193,8 +193,8 @@ static int __init compr_init(struct ubifs_compressor *compr)
if (compr->capi_name) {
compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
if (IS_ERR(compr->cc)) {
- ubifs_err("cannot initialize compressor %s, error %ld",
- compr->name, PTR_ERR(compr->cc));
+ pr_err("UBIFS error (pid %d): cannot initialize compressor %s, error %ld",
+ current->pid, compr->name, PTR_ERR(compr->cc));
return PTR_ERR(compr->cc);
}
}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 4cfb3e82c56f..4c46a9865fa7 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -746,7 +746,7 @@ void ubifs_dump_lprops(struct ubifs_info *c)
for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
err = ubifs_read_one_lp(c, lnum, &lp);
if (err) {
- ubifs_err("cannot read lprops for LEB %d", lnum);
+ ubifs_err(c, "cannot read lprops for LEB %d", lnum);
continue;
}
@@ -819,13 +819,13 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
- ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
+ ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum);
return;
}
sleb = ubifs_scan(c, lnum, 0, buf, 0);
if (IS_ERR(sleb)) {
- ubifs_err("scan error %d", (int)PTR_ERR(sleb));
+ ubifs_err(c, "scan error %d", (int)PTR_ERR(sleb));
goto out;
}
@@ -1032,7 +1032,7 @@ int dbg_check_space_info(struct ubifs_info *c)
spin_unlock(&c->space_lock);
if (free != d->saved_free) {
- ubifs_err("free space changed from %lld to %lld",
+ ubifs_err(c, "free space changed from %lld to %lld",
d->saved_free, free);
goto out;
}
@@ -1040,15 +1040,15 @@ int dbg_check_space_info(struct ubifs_info *c)
return 0;
out:
- ubifs_msg("saved lprops statistics dump");
+ ubifs_msg(c, "saved lprops statistics dump");
ubifs_dump_lstats(&d->saved_lst);
- ubifs_msg("saved budgeting info dump");
+ ubifs_msg(c, "saved budgeting info dump");
ubifs_dump_budg(c, &d->saved_bi);
- ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
- ubifs_msg("current lprops statistics dump");
+ ubifs_msg(c, "saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
+ ubifs_msg(c, "current lprops statistics dump");
ubifs_get_lp_stats(c, &lst);
ubifs_dump_lstats(&lst);
- ubifs_msg("current budgeting info dump");
+ ubifs_msg(c, "current budgeting info dump");
ubifs_dump_budg(c, &c->bi);
dump_stack();
return -EINVAL;
@@ -1077,9 +1077,9 @@ int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
mutex_lock(&ui->ui_mutex);
spin_lock(&ui->ui_lock);
if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
- ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode is clean",
+ ubifs_err(c, "ui_size is %lld, synced_i_size is %lld, but inode is clean",
ui->ui_size, ui->synced_i_size);
- ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
+ ubifs_err(c, "i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
inode->i_mode, i_size_read(inode));
dump_stack();
err = -EINVAL;
@@ -1140,7 +1140,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
kfree(pdent);
if (i_size_read(dir) != size) {
- ubifs_err("directory inode %lu has size %llu, but calculated size is %llu",
+ ubifs_err(c, "directory inode %lu has size %llu, but calculated size is %llu",
dir->i_ino, (unsigned long long)i_size_read(dir),
(unsigned long long)size);
ubifs_dump_inode(c, dir);
@@ -1148,7 +1148,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
return -EINVAL;
}
if (dir->i_nlink != nlink) {
- ubifs_err("directory inode %lu has nlink %u, but calculated nlink is %u",
+ ubifs_err(c, "directory inode %lu has nlink %u, but calculated nlink is %u",
dir->i_ino, dir->i_nlink, nlink);
ubifs_dump_inode(c, dir);
dump_stack();
@@ -1207,10 +1207,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
err = 1;
key_read(c, &dent1->key, &key);
if (keys_cmp(c, &zbr1->key, &key)) {
- ubifs_err("1st entry at %d:%d has key %s", zbr1->lnum,
+ ubifs_err(c, "1st entry at %d:%d has key %s", zbr1->lnum,
zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
DBG_KEY_BUF_LEN));
- ubifs_err("but it should have key %s according to tnc",
+ ubifs_err(c, "but it should have key %s according to tnc",
dbg_snprintf_key(c, &zbr1->key, key_buf,
DBG_KEY_BUF_LEN));
ubifs_dump_node(c, dent1);
@@ -1219,10 +1219,10 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
key_read(c, &dent2->key, &key);
if (keys_cmp(c, &zbr2->key, &key)) {
- ubifs_err("2nd entry at %d:%d has key %s", zbr1->lnum,
+ ubifs_err(c, "2nd entry at %d:%d has key %s", zbr1->lnum,
zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
DBG_KEY_BUF_LEN));
- ubifs_err("but it should have key %s according to tnc",
+ ubifs_err(c, "but it should have key %s according to tnc",
dbg_snprintf_key(c, &zbr2->key, key_buf,
DBG_KEY_BUF_LEN));
ubifs_dump_node(c, dent2);
@@ -1238,14 +1238,14 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
goto out_free;
}
if (cmp == 0 && nlen1 == nlen2)
- ubifs_err("2 xent/dent nodes with the same name");
+ ubifs_err(c, "2 xent/dent nodes with the same name");
else
- ubifs_err("bad order of colliding key %s",
+ ubifs_err(c, "bad order of colliding key %s",
dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
- ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
+ ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs);
ubifs_dump_node(c, dent1);
- ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
+ ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs);
ubifs_dump_node(c, dent2);
out_free:
@@ -1447,11 +1447,11 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
return 0;
out:
- ubifs_err("failed, error %d", err);
- ubifs_msg("dump of the znode");
+ ubifs_err(c, "failed, error %d", err);
+ ubifs_msg(c, "dump of the znode");
ubifs_dump_znode(c, znode);
if (zp) {
- ubifs_msg("dump of the parent znode");
+ ubifs_msg(c, "dump of the parent znode");
ubifs_dump_znode(c, zp);
}
dump_stack();
@@ -1518,9 +1518,9 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
if (err < 0)
return err;
if (err) {
- ubifs_msg("first znode");
+ ubifs_msg(c, "first znode");
ubifs_dump_znode(c, prev);
- ubifs_msg("second znode");
+ ubifs_msg(c, "second znode");
ubifs_dump_znode(c, znode);
return -EINVAL;
}
@@ -1529,13 +1529,13 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
if (extra) {
if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
- ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld",
+ ubifs_err(c, "incorrect clean_zn_cnt %ld, calculated %ld",
atomic_long_read(&c->clean_zn_cnt),
clean_cnt);
return -EINVAL;
}
if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
- ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld",
+ ubifs_err(c, "incorrect dirty_zn_cnt %ld, calculated %ld",
atomic_long_read(&c->dirty_zn_cnt),
dirty_cnt);
return -EINVAL;
@@ -1608,7 +1608,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
if (znode_cb) {
err = znode_cb(c, znode, priv);
if (err) {
- ubifs_err("znode checking function returned error %d",
+ ubifs_err(c, "znode checking function returned error %d",
err);
ubifs_dump_znode(c, znode);
goto out_dump;
@@ -1619,7 +1619,7 @@ int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
zbr = &znode->zbranch[idx];
err = leaf_cb(c, zbr, priv);
if (err) {
- ubifs_err("leaf checking function returned error %d, for leaf at LEB %d:%d",
+ ubifs_err(c, "leaf checking function returned error %d, for leaf at LEB %d:%d",
err, zbr->lnum, zbr->offs);
goto out_dump;
}
@@ -1675,7 +1675,7 @@ out_dump:
zbr = &znode->parent->zbranch[znode->iip];
else
zbr = &c->zroot;
- ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
+ ubifs_msg(c, "dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
ubifs_dump_znode(c, znode);
out_unlock:
mutex_unlock(&c->tnc_mutex);
@@ -1722,12 +1722,12 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
err = dbg_walk_index(c, NULL, add_size, &calc);
if (err) {
- ubifs_err("error %d while walking the index", err);
+ ubifs_err(c, "error %d while walking the index", err);
return err;
}
if (calc != idx_size) {
- ubifs_err("index size check failed: calculated size is %lld, should be %lld",
+ ubifs_err(c, "index size check failed: calculated size is %lld, should be %lld",
calc, idx_size);
dump_stack();
return -EINVAL;
@@ -1814,7 +1814,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
}
if (inum > c->highest_inum) {
- ubifs_err("too high inode number, max. is %lu",
+ ubifs_err(c, "too high inode number, max. is %lu",
(unsigned long)c->highest_inum);
return ERR_PTR(-EINVAL);
}
@@ -1921,17 +1921,17 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
ino_key_init(c, &key, inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index", (unsigned long)inum);
+ ubifs_err(c, "inode %lu not found in index", (unsigned long)inum);
return ERR_PTR(-ENOENT);
} else if (err < 0) {
- ubifs_err("error %d while looking up inode %lu",
+ ubifs_err(c, "error %d while looking up inode %lu",
err, (unsigned long)inum);
return ERR_PTR(err);
}
zbr = &znode->zbranch[n];
if (zbr->len < UBIFS_INO_NODE_SZ) {
- ubifs_err("bad node %lu node length %d",
+ ubifs_err(c, "bad node %lu node length %d",
(unsigned long)inum, zbr->len);
return ERR_PTR(-EINVAL);
}
@@ -1942,7 +1942,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
err = ubifs_tnc_read_node(c, zbr, ino);
if (err) {
- ubifs_err("cannot read inode node at LEB %d:%d, error %d",
+ ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
zbr->lnum, zbr->offs, err);
kfree(ino);
return ERR_PTR(err);
@@ -1951,7 +1951,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
fscki = add_inode(c, fsckd, ino);
kfree(ino);
if (IS_ERR(fscki)) {
- ubifs_err("error %ld while adding inode %lu node",
+ ubifs_err(c, "error %ld while adding inode %lu node",
PTR_ERR(fscki), (unsigned long)inum);
return fscki;
}
@@ -1985,7 +1985,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
struct fsck_inode *fscki;
if (zbr->len < UBIFS_CH_SZ) {
- ubifs_err("bad leaf length %d (LEB %d:%d)",
+ ubifs_err(c, "bad leaf length %d (LEB %d:%d)",
zbr->len, zbr->lnum, zbr->offs);
return -EINVAL;
}
@@ -1996,7 +1996,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
err = ubifs_tnc_read_node(c, zbr, node);
if (err) {
- ubifs_err("cannot read leaf node at LEB %d:%d, error %d",
+ ubifs_err(c, "cannot read leaf node at LEB %d:%d, error %d",
zbr->lnum, zbr->offs, err);
goto out_free;
}
@@ -2006,7 +2006,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki = add_inode(c, priv, node);
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
- ubifs_err("error %d while adding inode node", err);
+ ubifs_err(c, "error %d while adding inode node", err);
goto out_dump;
}
goto out;
@@ -2014,7 +2014,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
type != UBIFS_DATA_KEY) {
- ubifs_err("unexpected node type %d at LEB %d:%d",
+ ubifs_err(c, "unexpected node type %d at LEB %d:%d",
type, zbr->lnum, zbr->offs);
err = -EINVAL;
goto out_free;
@@ -2022,7 +2022,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
ch = node;
if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
- ubifs_err("too high sequence number, max. is %llu",
+ ubifs_err(c, "too high sequence number, max. is %llu",
c->max_sqnum);
err = -EINVAL;
goto out_dump;
@@ -2042,7 +2042,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki = read_add_inode(c, priv, inum);
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
- ubifs_err("error %d while processing data node and trying to find inode node %lu",
+ ubifs_err(c, "error %d while processing data node and trying to find inode node %lu",
err, (unsigned long)inum);
goto out_dump;
}
@@ -2052,7 +2052,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
blk_offs <<= UBIFS_BLOCK_SHIFT;
blk_offs += le32_to_cpu(dn->size);
if (blk_offs > fscki->size) {
- ubifs_err("data node at LEB %d:%d is not within inode size %lld",
+ ubifs_err(c, "data node at LEB %d:%d is not within inode size %lld",
zbr->lnum, zbr->offs, fscki->size);
err = -EINVAL;
goto out_dump;
@@ -2076,7 +2076,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki = read_add_inode(c, priv, inum);
if (IS_ERR(fscki)) {
err = PTR_ERR(fscki);
- ubifs_err("error %d while processing entry node and trying to find inode node %lu",
+ ubifs_err(c, "error %d while processing entry node and trying to find inode node %lu",
err, (unsigned long)inum);
goto out_dump;
}
@@ -2088,7 +2088,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
fscki1 = read_add_inode(c, priv, inum);
if (IS_ERR(fscki1)) {
err = PTR_ERR(fscki1);
- ubifs_err("error %d while processing entry node and trying to find parent inode node %lu",
+ ubifs_err(c, "error %d while processing entry node and trying to find parent inode node %lu",
err, (unsigned long)inum);
goto out_dump;
}
@@ -2111,7 +2111,7 @@ out:
return 0;
out_dump:
- ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
+ ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
ubifs_dump_node(c, node);
out_free:
kfree(node);
@@ -2162,52 +2162,52 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
*/
if (fscki->inum != UBIFS_ROOT_INO &&
fscki->references != 1) {
- ubifs_err("directory inode %lu has %d direntries which refer it, but should be 1",
+ ubifs_err(c, "directory inode %lu has %d direntries which refer it, but should be 1",
(unsigned long)fscki->inum,
fscki->references);
goto out_dump;
}
if (fscki->inum == UBIFS_ROOT_INO &&
fscki->references != 0) {
- ubifs_err("root inode %lu has non-zero (%d) direntries which refer it",
+ ubifs_err(c, "root inode %lu has non-zero (%d) direntries which refer it",
(unsigned long)fscki->inum,
fscki->references);
goto out_dump;
}
if (fscki->calc_sz != fscki->size) {
- ubifs_err("directory inode %lu size is %lld, but calculated size is %lld",
+ ubifs_err(c, "directory inode %lu size is %lld, but calculated size is %lld",
(unsigned long)fscki->inum,
fscki->size, fscki->calc_sz);
goto out_dump;
}
if (fscki->calc_cnt != fscki->nlink) {
- ubifs_err("directory inode %lu nlink is %d, but calculated nlink is %d",
+ ubifs_err(c, "directory inode %lu nlink is %d, but calculated nlink is %d",
(unsigned long)fscki->inum,
fscki->nlink, fscki->calc_cnt);
goto out_dump;
}
} else {
if (fscki->references != fscki->nlink) {
- ubifs_err("inode %lu nlink is %d, but calculated nlink is %d",
+ ubifs_err(c, "inode %lu nlink is %d, but calculated nlink is %d",
(unsigned long)fscki->inum,
fscki->nlink, fscki->references);
goto out_dump;
}
}
if (fscki->xattr_sz != fscki->calc_xsz) {
- ubifs_err("inode %lu has xattr size %u, but calculated size is %lld",
+ ubifs_err(c, "inode %lu has xattr size %u, but calculated size is %lld",
(unsigned long)fscki->inum, fscki->xattr_sz,
fscki->calc_xsz);
goto out_dump;
}
if (fscki->xattr_cnt != fscki->calc_xcnt) {
- ubifs_err("inode %lu has %u xattrs, but calculated count is %lld",
+ ubifs_err(c, "inode %lu has %u xattrs, but calculated count is %lld",
(unsigned long)fscki->inum,
fscki->xattr_cnt, fscki->calc_xcnt);
goto out_dump;
}
if (fscki->xattr_nms != fscki->calc_xnms) {
- ubifs_err("inode %lu has xattr names' size %u, but calculated names' size is %lld",
+ ubifs_err(c, "inode %lu has xattr names' size %u, but calculated names' size is %lld",
(unsigned long)fscki->inum, fscki->xattr_nms,
fscki->calc_xnms);
goto out_dump;
@@ -2221,11 +2221,11 @@ out_dump:
ino_key_init(c, &key, fscki->inum);
err = ubifs_lookup_level0(c, &key, &znode, &n);
if (!err) {
- ubifs_err("inode %lu not found in index",
+ ubifs_err(c, "inode %lu not found in index",
(unsigned long)fscki->inum);
return -ENOENT;
} else if (err < 0) {
- ubifs_err("error %d while looking up inode %lu",
+ ubifs_err(c, "error %d while looking up inode %lu",
err, (unsigned long)fscki->inum);
return err;
}
@@ -2237,13 +2237,13 @@ out_dump:
err = ubifs_tnc_read_node(c, zbr, ino);
if (err) {
- ubifs_err("cannot read inode node at LEB %d:%d, error %d",
+ ubifs_err(c, "cannot read inode node at LEB %d:%d, error %d",
zbr->lnum, zbr->offs, err);
kfree(ino);
return err;
}
- ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
+ ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d",
(unsigned long)fscki->inum, zbr->lnum, zbr->offs);
ubifs_dump_node(c, ino);
kfree(ino);
@@ -2284,7 +2284,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
return 0;
out_free:
- ubifs_err("file-system check failed with error %d", err);
+ ubifs_err(c, "file-system check failed with error %d", err);
dump_stack();
free_inodes(&fsckd);
return err;
@@ -2315,12 +2315,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
sb = container_of(cur->next, struct ubifs_scan_node, list);
if (sa->type != UBIFS_DATA_NODE) {
- ubifs_err("bad node type %d", sa->type);
+ ubifs_err(c, "bad node type %d", sa->type);
ubifs_dump_node(c, sa->node);
return -EINVAL;
}
if (sb->type != UBIFS_DATA_NODE) {
- ubifs_err("bad node type %d", sb->type);
+ ubifs_err(c, "bad node type %d", sb->type);
ubifs_dump_node(c, sb->node);
return -EINVAL;
}
@@ -2331,7 +2331,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
if (inuma < inumb)
continue;
if (inuma > inumb) {
- ubifs_err("larger inum %lu goes before inum %lu",
+ ubifs_err(c, "larger inum %lu goes before inum %lu",
(unsigned long)inuma, (unsigned long)inumb);
goto error_dump;
}
@@ -2340,11 +2340,11 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
blkb = key_block(c, &sb->key);
if (blka > blkb) {
- ubifs_err("larger block %u goes before %u", blka, blkb);
+ ubifs_err(c, "larger block %u goes before %u", blka, blkb);
goto error_dump;
}
if (blka == blkb) {
- ubifs_err("two data nodes for the same block");
+ ubifs_err(c, "two data nodes for the same block");
goto error_dump;
}
}
@@ -2383,19 +2383,19 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
sa->type != UBIFS_XENT_NODE) {
- ubifs_err("bad node type %d", sa->type);
+ ubifs_err(c, "bad node type %d", sa->type);
ubifs_dump_node(c, sa->node);
return -EINVAL;
}
if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
sa->type != UBIFS_XENT_NODE) {
- ubifs_err("bad node type %d", sb->type);
+ ubifs_err(c, "bad node type %d", sb->type);
ubifs_dump_node(c, sb->node);
return -EINVAL;
}
if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
- ubifs_err("non-inode node goes before inode node");
+ ubifs_err(c, "non-inode node goes before inode node");
goto error_dump;
}
@@ -2405,7 +2405,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
/* Inode nodes are sorted in descending size order */
if (sa->len < sb->len) {
- ubifs_err("smaller inode node goes first");
+ ubifs_err(c, "smaller inode node goes first");
goto error_dump;
}
continue;
@@ -2421,7 +2421,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
if (inuma < inumb)
continue;
if (inuma > inumb) {
- ubifs_err("larger inum %lu goes before inum %lu",
+ ubifs_err(c, "larger inum %lu goes before inum %lu",
(unsigned long)inuma, (unsigned long)inumb);
goto error_dump;
}
@@ -2430,7 +2430,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
hashb = key_block(c, &sb->key);
if (hasha > hashb) {
- ubifs_err("larger hash %u goes before %u",
+ ubifs_err(c, "larger hash %u goes before %u",
hasha, hashb);
goto error_dump;
}
@@ -2439,9 +2439,9 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
return 0;
error_dump:
- ubifs_msg("dumping first node");
+ ubifs_msg(c, "dumping first node");
ubifs_dump_node(c, sa->node);
- ubifs_msg("dumping second node");
+ ubifs_msg(c, "dumping second node");
ubifs_dump_node(c, sb->node);
return -EINVAL;
return 0;
@@ -2470,13 +2470,13 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
delay = prandom_u32() % 60000;
d->pc_timeout = jiffies;
d->pc_timeout += msecs_to_jiffies(delay);
- ubifs_warn("failing after %lums", delay);
+ ubifs_warn(c, "failing after %lums", delay);
} else {
d->pc_delay = 2;
delay = prandom_u32() % 10000;
/* Fail within 10000 operations */
d->pc_cnt_max = delay;
- ubifs_warn("failing after %lu calls", delay);
+ ubifs_warn(c, "failing after %lu calls", delay);
}
}
@@ -2494,55 +2494,55 @@ static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
return 0;
if (chance(19, 20))
return 0;
- ubifs_warn("failing in super block LEB %d", lnum);
+ ubifs_warn(c, "failing in super block LEB %d", lnum);
} else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
if (chance(19, 20))
return 0;
- ubifs_warn("failing in master LEB %d", lnum);
+ ubifs_warn(c, "failing in master LEB %d", lnum);
} else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
if (write && chance(99, 100))
return 0;
if (chance(399, 400))
return 0;
- ubifs_warn("failing in log LEB %d", lnum);
+ ubifs_warn(c, "failing in log LEB %d", lnum);
} else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
if (write && chance(7, 8))
return 0;
if (chance(19, 20))
return 0;
- ubifs_warn("failing in LPT LEB %d", lnum);
+ ubifs_warn(c, "failing in LPT LEB %d", lnum);
} else if (lnum >= c->orph_first && lnum <= c->orph_last) {
if (write && chance(1, 2))
return 0;
if (chance(9, 10))
return 0;
- ubifs_warn("failing in orphan LEB %d", lnum);
+ ubifs_warn(c, "failing in orphan LEB %d", lnum);
} else if (lnum == c->ihead_lnum) {
if (chance(99, 100))
return 0;
- ubifs_warn("failing in index head LEB %d", lnum);
+ ubifs_warn(c, "failing in index head LEB %d", lnum);
} else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
if (chance(9, 10))
return 0;
- ubifs_warn("failing in GC head LEB %d", lnum);
+ ubifs_warn(c, "failing in GC head LEB %d", lnum);
} else if (write && !RB_EMPTY_ROOT(&c->buds) &&
!ubifs_search_bud(c, lnum)) {
if (chance(19, 20))
return 0;
- ubifs_warn("failing in non-bud LEB %d", lnum);
+ ubifs_warn(c, "failing in non-bud LEB %d", lnum);
} else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
c->cmt_state == COMMIT_RUNNING_REQUIRED) {
if (chance(999, 1000))
return 0;
- ubifs_warn("failing in bud LEB %d commit running", lnum);
+ ubifs_warn(c, "failing in bud LEB %d commit running", lnum);
} else {
if (chance(9999, 10000))
return 0;
- ubifs_warn("failing in bud LEB %d commit not running", lnum);
+ ubifs_warn(c, "failing in bud LEB %d commit not running", lnum);
}
d->pc_happened = 1;
- ubifs_warn("========== Power cut emulated ==========");
+ ubifs_warn(c, "========== Power cut emulated ==========");
dump_stack();
return 1;
}
@@ -2557,7 +2557,7 @@ static int corrupt_data(const struct ubifs_info *c, const void *buf,
/* Corruption span max to end of write unit */
to = min(len, ALIGN(from + 1, c->max_write_size));
- ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
+ ubifs_warn(c, "filled bytes %u-%u with %s", from, to - 1,
ffs ? "0xFFs" : "random data");
if (ffs)
@@ -2579,7 +2579,7 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
failing = power_cut_emulated(c, lnum, 1);
if (failing) {
len = corrupt_data(c, buf, len);
- ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
+ ubifs_warn(c, "actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
len, lnum, offs);
}
err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
@@ -2909,7 +2909,7 @@ out_remove:
debugfs_remove_recursive(d->dfs_dir);
out:
err = dent ? PTR_ERR(dent) : -ENODEV;
- ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
+ ubifs_err(c, "cannot create \"%s\" debugfs file or directory, error %d\n",
fname, err);
return err;
}
@@ -3063,8 +3063,8 @@ out_remove:
debugfs_remove_recursive(dfs_rootdir);
out:
err = dent ? PTR_ERR(dent) : -ENODEV;
- ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
- fname, err);
+ pr_err("UBIFS error (pid %d): cannot create \"%s\" debugfs file or directory, error %d\n",
+ current->pid, fname, err);
return err;
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 0fa6c803992e..02d1ee778df0 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -146,12 +146,12 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
if (c->highest_inum >= INUM_WARN_WATERMARK) {
if (c->highest_inum >= INUM_WATERMARK) {
spin_unlock(&c->cnt_lock);
- ubifs_err("out of inode numbers");
+ ubifs_err(c, "out of inode numbers");
make_bad_inode(inode);
iput(inode);
return ERR_PTR(-EINVAL);
}
- ubifs_warn("running out of inode numbers (current %lu, max %d)",
+ ubifs_warn(c, "running out of inode numbers (current %lu, max %u)",
(unsigned long)c->highest_inum, INUM_WATERMARK);
}
@@ -222,7 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
* checking.
*/
err = PTR_ERR(inode);
- ubifs_err("dead directory entry '%pd', error %d",
+ ubifs_err(c, "dead directory entry '%pd', error %d",
dentry, err);
ubifs_ro_mode(c, err);
goto out;
@@ -272,7 +272,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
err = ubifs_init_security(dir, inode, &dentry->d_name);
if (err)
- goto out_cancel;
+ goto out_inode;
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
@@ -292,11 +292,12 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
+out_inode:
make_bad_inode(inode);
iput(inode);
out_budg:
ubifs_release_budget(c, &req);
- ubifs_err("cannot create regular file, error %d", err);
+ ubifs_err(c, "cannot create regular file, error %d", err);
return err;
}
@@ -449,7 +450,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
out:
if (err != -ENOENT) {
- ubifs_err("cannot find next direntry, error %d", err);
+ ubifs_err(c, "cannot find next direntry, error %d", err);
return err;
}
@@ -732,7 +733,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
err = ubifs_init_security(dir, inode, &dentry->d_name);
if (err)
- goto out_cancel;
+ goto out_inode;
mutex_lock(&dir_ui->ui_mutex);
insert_inode_hash(inode);
@@ -743,7 +744,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
dir->i_mtime = dir->i_ctime = inode->i_ctime;
err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
if (err) {
- ubifs_err("cannot create directory, error %d", err);
+ ubifs_err(c, "cannot create directory, error %d", err);
goto out_cancel;
}
mutex_unlock(&dir_ui->ui_mutex);
@@ -757,6 +758,7 @@ out_cancel:
dir_ui->ui_size = dir->i_size;
drop_nlink(dir);
mutex_unlock(&dir_ui->ui_mutex);
+out_inode:
make_bad_inode(inode);
iput(inode);
out_budg:
@@ -816,7 +818,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
err = ubifs_init_security(dir, inode, &dentry->d_name);
if (err)
- goto out_cancel;
+ goto out_inode;
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
@@ -836,6 +838,7 @@ out_cancel:
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
+out_inode:
make_bad_inode(inode);
iput(inode);
out_budg:
@@ -896,7 +899,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
err = ubifs_init_security(dir, inode, &dentry->d_name);
if (err)
- goto out_cancel;
+ goto out_inode;
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0acf626..3ba3fef64e9e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
*/
#include "ubifs.h"
-#include <linux/aio.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/slab.h>
@@ -80,7 +79,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
out_len = UBIFS_BLOCK_SIZE;
- err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+ err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
le16_to_cpu(dn->compr_type));
if (err || len != out_len)
goto dump;
@@ -96,7 +95,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
return 0;
dump:
- ubifs_err("bad data node (block %u, inode %lu)",
+ ubifs_err(c, "bad data node (block %u, inode %lu)",
block, inode->i_ino);
ubifs_dump_node(c, dn);
return -EINVAL;
@@ -161,13 +160,14 @@ static int do_readpage(struct page *page)
addr += UBIFS_BLOCK_SIZE;
}
if (err) {
+ struct ubifs_info *c = inode->i_sb->s_fs_info;
if (err == -ENOENT) {
/* Not found, so it must be a hole */
SetPageChecked(page);
dbg_gen("hole");
goto out_free;
}
- ubifs_err("cannot read page %lu of inode %lu, error %d",
+ ubifs_err(c, "cannot read page %lu of inode %lu, error %d",
page->index, inode->i_ino, err);
goto error;
}
@@ -650,7 +650,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
out_len = UBIFS_BLOCK_SIZE;
- err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
+ err = ubifs_decompress(c, &dn->data, dlen, addr, &out_len,
le16_to_cpu(dn->compr_type));
if (err || len != out_len)
goto out_err;
@@ -698,7 +698,7 @@ out_err:
SetPageError(page);
flush_dcache_page(page);
kunmap(page);
- ubifs_err("bad data node (block %u, inode %lu)",
+ ubifs_err(c, "bad data node (block %u, inode %lu)",
page_block, inode->i_ino);
return -EINVAL;
}
@@ -802,7 +802,7 @@ out_free:
return ret;
out_warn:
- ubifs_warn("ignoring error %d and skipping bulk-read", err);
+ ubifs_warn(c, "ignoring error %d and skipping bulk-read", err);
goto out_free;
out_bu_off:
@@ -930,7 +930,7 @@ static int do_writepage(struct page *page, int len)
}
if (err) {
SetPageError(page);
- ubifs_err("cannot write page %lu of inode %lu, error %d",
+ ubifs_err(c, "cannot write page %lu of inode %lu, error %d",
page->index, inode->i_ino, err);
ubifs_ro_mode(c, err);
}
@@ -1485,7 +1485,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
err = ubifs_budget_space(c, &req);
if (unlikely(err)) {
if (err == -ENOSPC)
- ubifs_warn("out of space for mmapped file (inode number %lu)",
+ ubifs_warn(c, "out of space for mmapped file (inode number %lu)",
inode->i_ino);
return VM_FAULT_SIGBUS;
}
@@ -1581,8 +1581,6 @@ const struct inode_operations ubifs_symlink_inode_operations = {
const struct file_operations ubifs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = generic_file_read_iter,
.write_iter = ubifs_write_iter,
.mmap = ubifs_file_mmap,
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index fb08b0c514b6..97be41215332 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -85,7 +85,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
c->ro_error = 1;
c->no_chk_data_crc = 0;
c->vfs_sb->s_flags |= MS_RDONLY;
- ubifs_warn("switched to read-only mode, error %d", err);
+ ubifs_warn(c, "switched to read-only mode, error %d", err);
dump_stack();
}
}
@@ -107,7 +107,7 @@ int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
* @even_ebadmsg is true.
*/
if (err && (err != -EBADMSG || even_ebadmsg)) {
- ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
+ ubifs_err(c, "reading %d bytes from LEB %d:%d failed, error %d",
len, lnum, offs, err);
dump_stack();
}
@@ -127,7 +127,7 @@ int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
else
err = dbg_leb_write(c, lnum, buf, offs, len);
if (err) {
- ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
+ ubifs_err(c, "writing %d bytes to LEB %d:%d failed, error %d",
len, lnum, offs, err);
ubifs_ro_mode(c, err);
dump_stack();
@@ -147,7 +147,7 @@ int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len)
else
err = dbg_leb_change(c, lnum, buf, len);
if (err) {
- ubifs_err("changing %d bytes in LEB %d failed, error %d",
+ ubifs_err(c, "changing %d bytes in LEB %d failed, error %d",
len, lnum, err);
ubifs_ro_mode(c, err);
dump_stack();
@@ -167,7 +167,7 @@ int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
else
err = dbg_leb_unmap(c, lnum);
if (err) {
- ubifs_err("unmap LEB %d failed, error %d", lnum, err);
+ ubifs_err(c, "unmap LEB %d failed, error %d", lnum, err);
ubifs_ro_mode(c, err);
dump_stack();
}
@@ -186,7 +186,7 @@ int ubifs_leb_map(struct ubifs_info *c, int lnum)
else
err = dbg_leb_map(c, lnum);
if (err) {
- ubifs_err("mapping LEB %d failed, error %d", lnum, err);
+ ubifs_err(c, "mapping LEB %d failed, error %d", lnum, err);
ubifs_ro_mode(c, err);
dump_stack();
}
@@ -199,7 +199,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
err = ubi_is_mapped(c->ubi, lnum);
if (err < 0) {
- ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
+ ubifs_err(c, "ubi_is_mapped failed for LEB %d, error %d",
lnum, err);
dump_stack();
}
@@ -247,7 +247,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
magic = le32_to_cpu(ch->magic);
if (magic != UBIFS_NODE_MAGIC) {
if (!quiet)
- ubifs_err("bad magic %#08x, expected %#08x",
+ ubifs_err(c, "bad magic %#08x, expected %#08x",
magic, UBIFS_NODE_MAGIC);
err = -EUCLEAN;
goto out;
@@ -256,7 +256,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
type = ch->node_type;
if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
if (!quiet)
- ubifs_err("bad node type %d", type);
+ ubifs_err(c, "bad node type %d", type);
goto out;
}
@@ -279,7 +279,7 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
node_crc = le32_to_cpu(ch->crc);
if (crc != node_crc) {
if (!quiet)
- ubifs_err("bad CRC: calculated %#08x, read %#08x",
+ ubifs_err(c, "bad CRC: calculated %#08x, read %#08x",
crc, node_crc);
err = -EUCLEAN;
goto out;
@@ -289,10 +289,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
out_len:
if (!quiet)
- ubifs_err("bad node length %d", node_len);
+ ubifs_err(c, "bad node length %d", node_len);
out:
if (!quiet) {
- ubifs_err("bad node at LEB %d:%d", lnum, offs);
+ ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
ubifs_dump_node(c, buf);
dump_stack();
}
@@ -355,11 +355,11 @@ static unsigned long long next_sqnum(struct ubifs_info *c)
if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
if (sqnum >= SQNUM_WATERMARK) {
- ubifs_err("sequence number overflow %llu, end of life",
+ ubifs_err(c, "sequence number overflow %llu, end of life",
sqnum);
ubifs_ro_mode(c, -EINVAL);
}
- ubifs_warn("running out of sequence numbers, end of life soon");
+ ubifs_warn(c, "running out of sequence numbers, end of life soon");
}
return sqnum;
@@ -636,7 +636,7 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
err = ubifs_wbuf_sync_nolock(wbuf);
mutex_unlock(&wbuf->io_mutex);
if (err) {
- ubifs_err("cannot sync write-buffer, error %d", err);
+ ubifs_err(c, "cannot sync write-buffer, error %d", err);
ubifs_ro_mode(c, err);
goto out_timers;
}
@@ -833,7 +833,7 @@ exit:
return 0;
out:
- ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
+ ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d",
len, wbuf->lnum, wbuf->offs, err);
ubifs_dump_node(c, buf);
dump_stack();
@@ -932,27 +932,27 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
}
if (type != ch->node_type) {
- ubifs_err("bad node type (%d but expected %d)",
+ ubifs_err(c, "bad node type (%d but expected %d)",
ch->node_type, type);
goto out;
}
err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
if (err) {
- ubifs_err("expected node type %d", type);
+ ubifs_err(c, "expected node type %d", type);
return err;
}
rlen = le32_to_cpu(ch->len);
if (rlen != len) {
- ubifs_err("bad node length %d, expected %d", rlen, len);
+ ubifs_err(c, "bad node length %d, expected %d", rlen, len);
goto out;
}
return 0;
out:
- ubifs_err("bad node at LEB %d:%d", lnum, offs);
+ ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
ubifs_dump_node(c, buf);
dump_stack();
return -EINVAL;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 648b143606cc..3c7b29de0ca7 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -138,7 +138,7 @@ static int setflags(struct inode *inode, int flags)
return err;
out_unlock:
- ubifs_err("can't modify inode %lu attributes", inode->i_ino);
+ ubifs_err(c, "can't modify inode %lu attributes", inode->i_ino);
mutex_unlock(&ui->ui_mutex);
ubifs_release_budget(c, &req);
return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index f6ac3f29323c..90ae1a8439d9 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -363,11 +363,11 @@ again:
* This should not happen unless the journal size limitations
* are too tough.
*/
- ubifs_err("stuck in space allocation");
+ ubifs_err(c, "stuck in space allocation");
err = -ENOSPC;
goto out;
} else if (cmt_retries > 32)
- ubifs_warn("too many space allocation re-tries (%d)",
+ ubifs_warn(c, "too many space allocation re-tries (%d)",
cmt_retries);
dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
@@ -380,7 +380,7 @@ again:
goto again;
out:
- ubifs_err("cannot reserve %d bytes in jhead %d, error %d",
+ ubifs_err(c, "cannot reserve %d bytes in jhead %d, error %d",
len, jhead, err);
if (err == -ENOSPC) {
/* This are some budgeting problems, print useful information */
@@ -731,7 +731,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
compr_type = ui->compr_type;
out_len = dlen - UBIFS_DATA_NODE_SZ;
- ubifs_compress(buf, len, &data->data, &out_len, &compr_type);
+ ubifs_compress(c, buf, len, &data->data, &out_len, &compr_type);
ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
dlen = UBIFS_DATA_NODE_SZ + out_len;
@@ -1100,7 +1100,8 @@ out_free:
* This function is used when an inode is truncated and the last data node of
* the inode has to be re-compressed and re-written.
*/
-static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
+static int recomp_data_node(const struct ubifs_info *c,
+ struct ubifs_data_node *dn, int *new_len)
{
void *buf;
int err, len, compr_type, out_len;
@@ -1112,11 +1113,11 @@ static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
compr_type = le16_to_cpu(dn->compr_type);
- err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type);
+ err = ubifs_decompress(c, &dn->data, len, buf, &out_len, compr_type);
if (err)
goto out;
- ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type);
+ ubifs_compress(c, buf, *new_len, &dn->data, &out_len, &compr_type);
ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
dn->compr_type = cpu_to_le16(compr_type);
dn->size = cpu_to_le32(*new_len);
@@ -1191,7 +1192,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
int compr_type = le16_to_cpu(dn->compr_type);
if (compr_type != UBIFS_COMPR_NONE) {
- err = recomp_data_node(dn, &dlen);
+ err = recomp_data_node(c, dn, &dlen);
if (err)
goto out_free;
} else {
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c14628fbeee2..8c795e6392b1 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -696,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
destroy_done_tree(&done_tree);
vfree(buf);
if (write_lnum == c->lhead_lnum) {
- ubifs_err("log is too full");
+ ubifs_err(c, "log is too full");
return -EINVAL;
}
/* Unmap remaining LEBs */
@@ -743,7 +743,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
bud_bytes += c->leb_size - bud->start;
if (c->bud_bytes != bud_bytes) {
- ubifs_err("bad bud_bytes %lld, calculated %lld",
+ ubifs_err(c, "bad bud_bytes %lld, calculated %lld",
c->bud_bytes, bud_bytes);
err = -EINVAL;
}
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 46190a7c42a6..a0011aa3a779 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -682,7 +682,7 @@ int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
if (err)
- ubifs_err("cannot change properties of LEB %d, error %d",
+ ubifs_err(c, "cannot change properties of LEB %d, error %d",
lnum, err);
return err;
}
@@ -721,7 +721,7 @@ int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
out:
ubifs_release_lprops(c);
if (err)
- ubifs_err("cannot update properties of LEB %d, error %d",
+ ubifs_err(c, "cannot update properties of LEB %d, error %d",
lnum, err);
return err;
}
@@ -746,7 +746,7 @@ int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
lpp = ubifs_lpt_lookup(c, lnum);
if (IS_ERR(lpp)) {
err = PTR_ERR(lpp);
- ubifs_err("cannot read properties of LEB %d, error %d",
+ ubifs_err(c, "cannot read properties of LEB %d, error %d",
lnum, err);
goto out;
}
@@ -873,13 +873,13 @@ int dbg_check_cats(struct ubifs_info *c)
list_for_each_entry(lprops, &c->empty_list, list) {
if (lprops->free != c->leb_size) {
- ubifs_err("non-empty LEB %d on empty list (free %d dirty %d flags %d)",
+ ubifs_err(c, "non-empty LEB %d on empty list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on empty list (free %d dirty %d flags %d)",
+ ubifs_err(c, "taken LEB %d on empty list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
@@ -889,13 +889,13 @@ int dbg_check_cats(struct ubifs_info *c)
i = 0;
list_for_each_entry(lprops, &c->freeable_list, list) {
if (lprops->free + lprops->dirty != c->leb_size) {
- ubifs_err("non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
+ ubifs_err(c, "non-freeable LEB %d on freeable list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on freeable list (free %d dirty %d flags %d)",
+ ubifs_err(c, "taken LEB %d on freeable list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
@@ -903,7 +903,7 @@ int dbg_check_cats(struct ubifs_info *c)
i += 1;
}
if (i != c->freeable_cnt) {
- ubifs_err("freeable list count %d expected %d", i,
+ ubifs_err(c, "freeable list count %d expected %d", i,
c->freeable_cnt);
return -EINVAL;
}
@@ -912,26 +912,26 @@ int dbg_check_cats(struct ubifs_info *c)
list_for_each(pos, &c->idx_gc)
i += 1;
if (i != c->idx_gc_cnt) {
- ubifs_err("idx_gc list count %d expected %d", i,
+ ubifs_err(c, "idx_gc list count %d expected %d", i,
c->idx_gc_cnt);
return -EINVAL;
}
list_for_each_entry(lprops, &c->frdi_idx_list, list) {
if (lprops->free + lprops->dirty != c->leb_size) {
- ubifs_err("non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ ubifs_err(c, "non-freeable LEB %d on frdi_idx list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ ubifs_err(c, "taken LEB %d on frdi_idx list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
}
if (!(lprops->flags & LPROPS_INDEX)) {
- ubifs_err("non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
+ ubifs_err(c, "non-index LEB %d on frdi_idx list (free %d dirty %d flags %d)",
lprops->lnum, lprops->free, lprops->dirty,
lprops->flags);
return -EINVAL;
@@ -944,15 +944,15 @@ int dbg_check_cats(struct ubifs_info *c)
for (i = 0; i < heap->cnt; i++) {
lprops = heap->arr[i];
if (!lprops) {
- ubifs_err("null ptr in LPT heap cat %d", cat);
+ ubifs_err(c, "null ptr in LPT heap cat %d", cat);
return -EINVAL;
}
if (lprops->hpos != i) {
- ubifs_err("bad ptr in LPT heap cat %d", cat);
+ ubifs_err(c, "bad ptr in LPT heap cat %d", cat);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
- ubifs_err("taken LEB in LPT heap cat %d", cat);
+ ubifs_err(c, "taken LEB in LPT heap cat %d", cat);
return -EINVAL;
}
}
@@ -988,7 +988,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
goto out;
}
if (lprops != lp) {
- ubifs_err("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
+ ubifs_err(c, "lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
(size_t)lprops, (size_t)lp, lprops->lnum,
lp->lnum);
err = 4;
@@ -1008,7 +1008,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
}
out:
if (err) {
- ubifs_err("failed cat %d hpos %d err %d", cat, i, err);
+ ubifs_err(c, "failed cat %d hpos %d err %d", cat, i, err);
dump_stack();
ubifs_dump_heap(c, heap, cat);
}
@@ -1039,7 +1039,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (cat != LPROPS_UNCAT) {
cat = ubifs_categorize_lprops(c, lp);
if (cat != (lp->flags & LPROPS_CAT_MASK)) {
- ubifs_err("bad LEB category %d expected %d",
+ ubifs_err(c, "bad LEB category %d expected %d",
(lp->flags & LPROPS_CAT_MASK), cat);
return -EINVAL;
}
@@ -1074,7 +1074,7 @@ static int scan_check_cb(struct ubifs_info *c,
}
}
if (!found) {
- ubifs_err("bad LPT list (category %d)", cat);
+ ubifs_err(c, "bad LPT list (category %d)", cat);
return -EINVAL;
}
}
@@ -1086,7 +1086,7 @@ static int scan_check_cb(struct ubifs_info *c,
if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
lp != heap->arr[lp->hpos]) {
- ubifs_err("bad LPT heap (category %d)", cat);
+ ubifs_err(c, "bad LPT heap (category %d)", cat);
return -EINVAL;
}
}
@@ -1133,7 +1133,7 @@ static int scan_check_cb(struct ubifs_info *c,
is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
if (is_idx && snod->type != UBIFS_IDX_NODE) {
- ubifs_err("indexing node in data LEB %d:%d",
+ ubifs_err(c, "indexing node in data LEB %d:%d",
lnum, snod->offs);
goto out_destroy;
}
@@ -1159,7 +1159,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
dirty < 0) {
- ubifs_err("bad calculated accounting for LEB %d: free %d, dirty %d",
+ ubifs_err(c, "bad calculated accounting for LEB %d: free %d, dirty %d",
lnum, free, dirty);
goto out_destroy;
}
@@ -1206,13 +1206,13 @@ static int scan_check_cb(struct ubifs_info *c,
/* Free but not unmapped LEB, it's fine */
is_idx = 0;
else {
- ubifs_err("indexing node without indexing flag");
+ ubifs_err(c, "indexing node without indexing flag");
goto out_print;
}
}
if (!is_idx && (lp->flags & LPROPS_INDEX)) {
- ubifs_err("data node with indexing flag");
+ ubifs_err(c, "data node with indexing flag");
goto out_print;
}
@@ -1241,7 +1241,7 @@ static int scan_check_cb(struct ubifs_info *c,
return LPT_SCAN_CONTINUE;
out_print:
- ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
+ ubifs_err(c, "bad accounting of LEB %d: free %d, dirty %d flags %#x, should be free %d, dirty %d",
lnum, lp->free, lp->dirty, lp->flags, free, dirty);
ubifs_dump_leb(c, lnum);
out_destroy:
@@ -1293,11 +1293,11 @@ int dbg_check_lprops(struct ubifs_info *c)
lst.total_free != c->lst.total_free ||
lst.total_dirty != c->lst.total_dirty ||
lst.total_used != c->lst.total_used) {
- ubifs_err("bad overall accounting");
- ubifs_err("calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
+ ubifs_err(c, "bad overall accounting");
+ ubifs_err(c, "calculated: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
lst.empty_lebs, lst.idx_lebs, lst.total_free,
lst.total_dirty, lst.total_used);
- ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
+ ubifs_err(c, "read from lprops: empty_lebs %d, idx_lebs %d, total_free %lld, total_dirty %lld, total_used %lld",
c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
c->lst.total_dirty, c->lst.total_used);
err = -EINVAL;
@@ -1306,10 +1306,10 @@ int dbg_check_lprops(struct ubifs_info *c)
if (lst.total_dead != c->lst.total_dead ||
lst.total_dark != c->lst.total_dark) {
- ubifs_err("bad dead/dark space accounting");
- ubifs_err("calculated: total_dead %lld, total_dark %lld",
+ ubifs_err(c, "bad dead/dark space accounting");
+ ubifs_err(c, "calculated: total_dead %lld, total_dark %lld",
lst.total_dead, lst.total_dark);
- ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
+ ubifs_err(c, "read from lprops: total_dead %lld, total_dark %lld",
c->lst.total_dead, c->lst.total_dark);
err = -EINVAL;
goto out;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 421bd0a80424..dc9f27e9d61b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -145,13 +145,13 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c)
sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size);
if (lebs_needed > c->lpt_lebs) {
- ubifs_err("too few LPT LEBs");
+ ubifs_err(c, "too few LPT LEBs");
return -EINVAL;
}
/* Verify that ltab fits in a single LEB (since ltab is a single node */
if (c->ltab_sz > c->leb_size) {
- ubifs_err("LPT ltab too big");
+ ubifs_err(c, "LPT ltab too big");
return -EINVAL;
}
@@ -213,7 +213,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
continue;
}
if (c->ltab_sz > c->leb_size) {
- ubifs_err("LPT ltab too big");
+ ubifs_err(c, "LPT ltab too big");
return -EINVAL;
}
*main_lebs = c->main_lebs;
@@ -911,7 +911,7 @@ static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int check_lpt_crc(void *buf, int len)
+static int check_lpt_crc(const struct ubifs_info *c, void *buf, int len)
{
int pos = 0;
uint8_t *addr = buf;
@@ -921,8 +921,8 @@ static int check_lpt_crc(void *buf, int len)
calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
len - UBIFS_LPT_CRC_BYTES);
if (crc != calc_crc) {
- ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc,
- calc_crc);
+ ubifs_err(c, "invalid crc in LPT node: crc %hx calc %hx",
+ crc, calc_crc);
dump_stack();
return -EINVAL;
}
@@ -938,14 +938,15 @@ static int check_lpt_crc(void *buf, int len)
*
* This function returns %0 on success and a negative error code on failure.
*/
-static int check_lpt_type(uint8_t **addr, int *pos, int type)
+static int check_lpt_type(const struct ubifs_info *c, uint8_t **addr,
+ int *pos, int type)
{
int node_type;
node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
if (node_type != type) {
- ubifs_err("invalid type (%d) in LPT node type %d", node_type,
- type);
+ ubifs_err(c, "invalid type (%d) in LPT node type %d",
+ node_type, type);
dump_stack();
return -EINVAL;
}
@@ -966,7 +967,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
- err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE);
+ err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_PNODE);
if (err)
return err;
if (c->big_lpt)
@@ -985,7 +986,7 @@ static int unpack_pnode(const struct ubifs_info *c, void *buf,
lprops->flags = 0;
lprops->flags |= ubifs_categorize_lprops(c, lprops);
}
- err = check_lpt_crc(buf, c->pnode_sz);
+ err = check_lpt_crc(c, buf, c->pnode_sz);
return err;
}
@@ -1003,7 +1004,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
- err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE);
+ err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_NNODE);
if (err)
return err;
if (c->big_lpt)
@@ -1019,7 +1020,7 @@ int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
c->lpt_offs_bits);
}
- err = check_lpt_crc(buf, c->nnode_sz);
+ err = check_lpt_crc(c, buf, c->nnode_sz);
return err;
}
@@ -1035,7 +1036,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
- err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB);
+ err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LTAB);
if (err)
return err;
for (i = 0; i < c->lpt_lebs; i++) {
@@ -1051,7 +1052,7 @@ static int unpack_ltab(const struct ubifs_info *c, void *buf)
c->ltab[i].tgc = 0;
c->ltab[i].cmt = 0;
}
- err = check_lpt_crc(buf, c->ltab_sz);
+ err = check_lpt_crc(c, buf, c->ltab_sz);
return err;
}
@@ -1067,7 +1068,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
int i, pos = 0, err;
- err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE);
+ err = check_lpt_type(c, &addr, &pos, UBIFS_LPT_LSAVE);
if (err)
return err;
for (i = 0; i < c->lsave_cnt; i++) {
@@ -1077,7 +1078,7 @@ static int unpack_lsave(const struct ubifs_info *c, void *buf)
return -EINVAL;
c->lsave[i] = lnum;
}
- err = check_lpt_crc(buf, c->lsave_sz);
+ err = check_lpt_crc(c, buf, c->lsave_sz);
return err;
}
@@ -1243,7 +1244,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
return 0;
out:
- ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
+ ubifs_err(c, "error %d reading nnode at %d:%d", err, lnum, offs);
dump_stack();
kfree(nnode);
return err;
@@ -1308,10 +1309,10 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
return 0;
out:
- ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
+ ubifs_err(c, "error %d reading pnode at %d:%d", err, lnum, offs);
ubifs_dump_pnode(c, pnode, parent, iip);
dump_stack();
- ubifs_err("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
+ ubifs_err(c, "calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
kfree(pnode);
return err;
}
@@ -2095,7 +2096,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
int i;
if (pnode->num != col) {
- ubifs_err("pnode num %d expected %d parent num %d iip %d",
+ ubifs_err(c, "pnode num %d expected %d parent num %d iip %d",
pnode->num, col, pnode->parent->num, pnode->iip);
return -EINVAL;
}
@@ -2110,13 +2111,13 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
if (lnum >= c->leb_cnt)
continue;
if (lprops->lnum != lnum) {
- ubifs_err("bad LEB number %d expected %d",
+ ubifs_err(c, "bad LEB number %d expected %d",
lprops->lnum, lnum);
return -EINVAL;
}
if (lprops->flags & LPROPS_TAKEN) {
if (cat != LPROPS_UNCAT) {
- ubifs_err("LEB %d taken but not uncat %d",
+ ubifs_err(c, "LEB %d taken but not uncat %d",
lprops->lnum, cat);
return -EINVAL;
}
@@ -2129,7 +2130,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
case LPROPS_FRDI_IDX:
break;
default:
- ubifs_err("LEB %d index but cat %d",
+ ubifs_err(c, "LEB %d index but cat %d",
lprops->lnum, cat);
return -EINVAL;
}
@@ -2142,7 +2143,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
case LPROPS_FREEABLE:
break;
default:
- ubifs_err("LEB %d not index but cat %d",
+ ubifs_err(c, "LEB %d not index but cat %d",
lprops->lnum, cat);
return -EINVAL;
}
@@ -2183,14 +2184,14 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
break;
}
if (!found) {
- ubifs_err("LEB %d cat %d not found in cat heap/list",
+ ubifs_err(c, "LEB %d cat %d not found in cat heap/list",
lprops->lnum, cat);
return -EINVAL;
}
switch (cat) {
case LPROPS_EMPTY:
if (lprops->free != c->leb_size) {
- ubifs_err("LEB %d cat %d free %d dirty %d",
+ ubifs_err(c, "LEB %d cat %d free %d dirty %d",
lprops->lnum, cat, lprops->free,
lprops->dirty);
return -EINVAL;
@@ -2199,7 +2200,7 @@ static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
case LPROPS_FREEABLE:
case LPROPS_FRDI_IDX:
if (lprops->free + lprops->dirty != c->leb_size) {
- ubifs_err("LEB %d cat %d free %d dirty %d",
+ ubifs_err(c, "LEB %d cat %d free %d dirty %d",
lprops->lnum, cat, lprops->free,
lprops->dirty);
return -EINVAL;
@@ -2236,7 +2237,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
/* cnode is a nnode */
num = calc_nnode_num(row, col);
if (cnode->num != num) {
- ubifs_err("nnode num %d expected %d parent num %d iip %d",
+ ubifs_err(c, "nnode num %d expected %d parent num %d iip %d",
cnode->num, num,
(nnode ? nnode->num : 0), cnode->iip);
return -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index d9c02928e992..ce89bdc3eb02 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -319,7 +319,7 @@ static int layout_cnodes(struct ubifs_info *c)
return 0;
no_space:
- ubifs_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+ ubifs_err(c, "LPT out of space at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
lnum, offs, len, done_ltab, done_lsave);
ubifs_dump_lpt_info(c);
ubifs_dump_lpt_lebs(c);
@@ -543,7 +543,7 @@ static int write_cnodes(struct ubifs_info *c)
return 0;
no_space:
- ubifs_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
+ ubifs_err(c, "LPT out of space mismatch at LEB %d:%d needing %d, done_ltab %d, done_lsave %d",
lnum, offs, len, done_ltab, done_lsave);
ubifs_dump_lpt_info(c);
ubifs_dump_lpt_lebs(c);
@@ -1638,7 +1638,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
- ubifs_err("cannot allocate memory for ltab checking");
+ ubifs_err(c, "cannot allocate memory for ltab checking");
return 0;
}
@@ -1660,18 +1660,18 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
continue;
}
if (!dbg_is_all_ff(p, len)) {
- ubifs_err("invalid empty space in LEB %d at %d",
+ ubifs_err(c, "invalid empty space in LEB %d at %d",
lnum, c->leb_size - len);
err = -EINVAL;
}
i = lnum - c->lpt_first;
if (len != c->ltab[i].free) {
- ubifs_err("invalid free space in LEB %d (free %d, expected %d)",
+ ubifs_err(c, "invalid free space in LEB %d (free %d, expected %d)",
lnum, len, c->ltab[i].free);
err = -EINVAL;
}
if (dirty != c->ltab[i].dirty) {
- ubifs_err("invalid dirty space in LEB %d (dirty %d, expected %d)",
+ ubifs_err(c, "invalid dirty space in LEB %d (dirty %d, expected %d)",
lnum, dirty, c->ltab[i].dirty);
err = -EINVAL;
}
@@ -1725,7 +1725,7 @@ int dbg_check_ltab(struct ubifs_info *c)
for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
err = dbg_check_ltab_lnum(c, lnum);
if (err) {
- ubifs_err("failed at LEB %d", lnum);
+ ubifs_err(c, "failed at LEB %d", lnum);
return err;
}
}
@@ -1757,7 +1757,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
free += c->leb_size;
}
if (free < c->lpt_sz) {
- ubifs_err("LPT space error: free %lld lpt_sz %lld",
+ ubifs_err(c, "LPT space error: free %lld lpt_sz %lld",
free, c->lpt_sz);
ubifs_dump_lpt_info(c);
ubifs_dump_lpt_lebs(c);
@@ -1797,12 +1797,12 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
d->chk_lpt_lebs = 0;
d->chk_lpt_wastage = 0;
if (c->dirty_pn_cnt > c->pnode_cnt) {
- ubifs_err("dirty pnodes %d exceed max %d",
+ ubifs_err(c, "dirty pnodes %d exceed max %d",
c->dirty_pn_cnt, c->pnode_cnt);
err = -EINVAL;
}
if (c->dirty_nn_cnt > c->nnode_cnt) {
- ubifs_err("dirty nnodes %d exceed max %d",
+ ubifs_err(c, "dirty nnodes %d exceed max %d",
c->dirty_nn_cnt, c->nnode_cnt);
err = -EINVAL;
}
@@ -1820,22 +1820,22 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
chk_lpt_sz *= d->chk_lpt_lebs;
chk_lpt_sz += len - c->nhead_offs;
if (d->chk_lpt_sz != chk_lpt_sz) {
- ubifs_err("LPT wrote %lld but space used was %lld",
+ ubifs_err(c, "LPT wrote %lld but space used was %lld",
d->chk_lpt_sz, chk_lpt_sz);
err = -EINVAL;
}
if (d->chk_lpt_sz > c->lpt_sz) {
- ubifs_err("LPT wrote %lld but lpt_sz is %lld",
+ ubifs_err(c, "LPT wrote %lld but lpt_sz is %lld",
d->chk_lpt_sz, c->lpt_sz);
err = -EINVAL;
}
if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) {
- ubifs_err("LPT layout size %lld but wrote %lld",
+ ubifs_err(c, "LPT layout size %lld but wrote %lld",
d->chk_lpt_sz, d->chk_lpt_sz2);
err = -EINVAL;
}
if (d->chk_lpt_sz2 && d->new_nhead_offs != len) {
- ubifs_err("LPT new nhead offs: expected %d was %d",
+ ubifs_err(c, "LPT new nhead offs: expected %d was %d",
d->new_nhead_offs, len);
err = -EINVAL;
}
@@ -1845,7 +1845,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
if (c->big_lpt)
lpt_sz += c->lsave_sz;
if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) {
- ubifs_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
+ ubifs_err(c, "LPT chk_lpt_sz %lld + waste %lld exceeds %lld",
d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz);
err = -EINVAL;
}
@@ -1887,7 +1887,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum);
buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
- ubifs_err("cannot allocate memory to dump LPT");
+ ubifs_err(c, "cannot allocate memory to dump LPT");
return;
}
@@ -1962,7 +1962,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
pr_err("LEB %d:%d, lsave len\n", lnum, offs);
break;
default:
- ubifs_err("LPT node type %d not recognized", node_type);
+ ubifs_err(c, "LPT node type %d not recognized", node_type);
goto out;
}
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 1a4bb9e8b3b8..c6a5e39e2ba5 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -82,7 +82,7 @@ out:
return -EUCLEAN;
out_dump:
- ubifs_err("unexpected node type %d master LEB %d:%d",
+ ubifs_err(c, "unexpected node type %d master LEB %d:%d",
snod->type, lnum, snod->offs);
ubifs_scan_destroy(sleb);
return -EINVAL;
@@ -240,7 +240,7 @@ static int validate_master(const struct ubifs_info *c)
return 0;
out:
- ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
+ ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err);
ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
@@ -316,7 +316,7 @@ int ubifs_read_master(struct ubifs_info *c)
if (c->leb_cnt < old_leb_cnt ||
c->leb_cnt < UBIFS_MIN_LEB_CNT) {
- ubifs_err("bad leb_cnt on master node");
+ ubifs_err(c, "bad leb_cnt on master node");
ubifs_dump_node(c, c->mst_node);
return -EINVAL;
}
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 4409f486ecef..caf2d123e9ee 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -88,7 +88,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
else if (inum > o->inum)
p = &(*p)->rb_right;
else {
- ubifs_err("orphaned twice");
+ ubifs_err(c, "orphaned twice");
spin_unlock(&c->orphan_lock);
kfree(orphan);
return 0;
@@ -155,7 +155,7 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
}
}
spin_unlock(&c->orphan_lock);
- ubifs_err("missing orphan ino %lu", (unsigned long)inum);
+ ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
dump_stack();
}
@@ -287,7 +287,7 @@ static int write_orph_node(struct ubifs_info *c, int atomic)
* We limit the number of orphans so that this should
* never happen.
*/
- ubifs_err("out of space in orphan area");
+ ubifs_err(c, "out of space in orphan area");
return -EINVAL;
}
}
@@ -397,7 +397,7 @@ static int consolidate(struct ubifs_info *c)
* We limit the number of orphans so that this should
* never happen.
*/
- ubifs_err("out of space in orphan area");
+ ubifs_err(c, "out of space in orphan area");
err = -EINVAL;
}
spin_unlock(&c->orphan_lock);
@@ -569,7 +569,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
list_for_each_entry(snod, &sleb->nodes, list) {
if (snod->type != UBIFS_ORPH_NODE) {
- ubifs_err("invalid node type %d in orphan area at %d:%d",
+ ubifs_err(c, "invalid node type %d in orphan area at %d:%d",
snod->type, sleb->lnum, snod->offs);
ubifs_dump_node(c, snod->node);
return -EINVAL;
@@ -596,7 +596,7 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
* number. That makes this orphan node, out of date.
*/
if (!first) {
- ubifs_err("out of order commit number %llu in orphan node at %d:%d",
+ ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d",
cmt_no, sleb->lnum, snod->offs);
ubifs_dump_node(c, snod->node);
return -EINVAL;
@@ -831,20 +831,20 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
if (inum != ci->last_ino) {
/* Lowest node type is the inode node, so it comes first */
if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
- ubifs_err("found orphan node ino %lu, type %d",
+ ubifs_err(c, "found orphan node ino %lu, type %d",
(unsigned long)inum, key_type(c, &zbr->key));
ci->last_ino = inum;
ci->tot_inos += 1;
err = ubifs_tnc_read_node(c, zbr, ci->node);
if (err) {
- ubifs_err("node read failed, error %d", err);
+ ubifs_err(c, "node read failed, error %d", err);
return err;
}
if (ci->node->nlink == 0)
/* Must be recorded as an orphan */
if (!dbg_find_check_orphan(&ci->root, inum) &&
!dbg_find_orphan(c, inum)) {
- ubifs_err("missing orphan, ino %lu",
+ ubifs_err(c, "missing orphan, ino %lu",
(unsigned long)inum);
ci->missing += 1;
}
@@ -887,7 +887,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
if (!buf) {
- ubifs_err("cannot allocate memory to check orphans");
+ ubifs_err(c, "cannot allocate memory to check orphans");
return 0;
}
@@ -925,7 +925,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
ci.root = RB_ROOT;
ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
if (!ci.node) {
- ubifs_err("out of memory");
+ ubifs_err(c, "out of memory");
return -ENOMEM;
}
@@ -935,12 +935,12 @@ static int dbg_check_orphans(struct ubifs_info *c)
err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
if (err) {
- ubifs_err("cannot scan TNC, error %d", err);
+ ubifs_err(c, "cannot scan TNC, error %d", err);
goto out;
}
if (ci.missing) {
- ubifs_err("%lu missing orphan(s)", ci.missing);
+ ubifs_err(c, "%lu missing orphan(s)", ci.missing);
err = -EINVAL;
goto out;
}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index c640938f62f0..695fc71d5244 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -305,7 +305,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
mst = mst2;
}
- ubifs_msg("recovered master node from LEB %d",
+ ubifs_msg(c, "recovered master node from LEB %d",
(mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
@@ -360,13 +360,13 @@ int ubifs_recover_master_node(struct ubifs_info *c)
out_err:
err = -EINVAL;
out_free:
- ubifs_err("failed to recover master node");
+ ubifs_err(c, "failed to recover master node");
if (mst1) {
- ubifs_err("dumping first master node");
+ ubifs_err(c, "dumping first master node");
ubifs_dump_node(c, mst1);
}
if (mst2) {
- ubifs_err("dumping second master node");
+ ubifs_err(c, "dumping second master node");
ubifs_dump_node(c, mst2);
}
vfree(buf2);
@@ -682,7 +682,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
ret, lnum, offs);
break;
} else {
- ubifs_err("unexpected return value %d", ret);
+ ubifs_err(c, "unexpected return value %d", ret);
err = -EINVAL;
goto error;
}
@@ -702,7 +702,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
* See header comment for this file for more
* explanations about the reasons we have this check.
*/
- ubifs_err("corrupt empty space LEB %d:%d, corruption starts at %d",
+ ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
lnum, offs, corruption);
/* Make sure we dump interesting non-0xFF data */
offs += corruption;
@@ -788,13 +788,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
corrupted_rescan:
/* Re-scan the corrupted data with verbose messages */
- ubifs_err("corruption %d", ret);
+ ubifs_err(c, "corruption %d", ret);
ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
corrupted:
ubifs_scanned_corruption(c, lnum, offs, buf);
err = -EUCLEAN;
error:
- ubifs_err("LEB %d scanning failed", lnum);
+ ubifs_err(c, "LEB %d scanning failed", lnum);
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
}
@@ -826,15 +826,15 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
goto out_free;
ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
if (ret != SCANNED_A_NODE) {
- ubifs_err("Not a valid node");
+ ubifs_err(c, "Not a valid node");
goto out_err;
}
if (cs_node->ch.node_type != UBIFS_CS_NODE) {
- ubifs_err("Node a CS node, type is %d", cs_node->ch.node_type);
+ ubifs_err(c, "Node a CS node, type is %d", cs_node->ch.node_type);
goto out_err;
}
if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
- ubifs_err("CS node cmt_no %llu != current cmt_no %llu",
+ ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
(unsigned long long)le64_to_cpu(cs_node->cmt_no),
c->cmt_no);
goto out_err;
@@ -847,7 +847,7 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
out_err:
err = -EINVAL;
out_free:
- ubifs_err("failed to get CS sqnum");
+ ubifs_err(c, "failed to get CS sqnum");
kfree(cs_node);
return err;
}
@@ -899,7 +899,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
}
}
if (snod->sqnum > cs_sqnum) {
- ubifs_err("unrecoverable log corruption in LEB %d",
+ ubifs_err(c, "unrecoverable log corruption in LEB %d",
lnum);
ubifs_scan_destroy(sleb);
return ERR_PTR(-EUCLEAN);
@@ -975,11 +975,8 @@ int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
return err;
dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
- err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
- if (err)
- return err;
- return 0;
+ return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
}
/**
@@ -1004,10 +1001,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
if (len == 0) {
/* Nothing to read, just unmap it */
- err = ubifs_leb_unmap(c, lnum);
- if (err)
- return err;
- return 0;
+ return ubifs_leb_unmap(c, lnum);
}
err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
@@ -1043,7 +1037,7 @@ static int clean_an_unclean_leb(struct ubifs_info *c,
}
if (ret == SCANNED_EMPTY_SPACE) {
- ubifs_err("unexpected empty space at %d:%d",
+ ubifs_err(c, "unexpected empty space at %d:%d",
lnum, offs);
return -EUCLEAN;
}
@@ -1137,7 +1131,7 @@ static int grab_empty_leb(struct ubifs_info *c)
*/
lnum = ubifs_find_free_leb_for_idx(c);
if (lnum < 0) {
- ubifs_err("could not find an empty LEB");
+ ubifs_err(c, "could not find an empty LEB");
ubifs_dump_lprops(c);
ubifs_dump_budg(c, &c->bi);
return lnum;
@@ -1217,7 +1211,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
}
mutex_unlock(&wbuf->io_mutex);
if (err < 0) {
- ubifs_err("GC failed, error %d", err);
+ ubifs_err(c, "GC failed, error %d", err);
if (err == -EAGAIN)
err = -EINVAL;
return err;
@@ -1464,7 +1458,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
return 0;
out:
- ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
+ ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
(unsigned long)e->inum, e->i_size, e->d_size, err);
return err;
}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 9b40a1c5e160..3ca4540130b5 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -458,13 +458,13 @@ int ubifs_validate_entry(struct ubifs_info *c,
nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
strnlen(dent->name, nlen) != nlen ||
le64_to_cpu(dent->inum) > MAX_INUM) {
- ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
+ ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
"directory entry" : "extended attribute entry");
return -EINVAL;
}
if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
- ubifs_err("bad key type %d", key_type);
+ ubifs_err(c, "bad key type %d", key_type);
return -EINVAL;
}
@@ -589,7 +589,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
cond_resched();
if (snod->sqnum >= SQNUM_WATERMARK) {
- ubifs_err("file system's life ended");
+ ubifs_err(c, "file system's life ended");
goto out_dump;
}
@@ -647,7 +647,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
if (old_size < 0 || old_size > c->max_inode_sz ||
new_size < 0 || new_size > c->max_inode_sz ||
old_size <= new_size) {
- ubifs_err("bad truncation node");
+ ubifs_err(c, "bad truncation node");
goto out_dump;
}
@@ -662,7 +662,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
break;
}
default:
- ubifs_err("unexpected node type %d in bud LEB %d:%d",
+ ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
snod->type, lnum, snod->offs);
err = -EINVAL;
goto out_dump;
@@ -685,7 +685,7 @@ out:
return err;
out_dump:
- ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
+ ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
ubifs_dump_node(c, snod->node);
ubifs_scan_destroy(sleb);
return -EINVAL;
@@ -805,7 +805,7 @@ static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
if (bud) {
if (bud->jhead == jhead && bud->start <= offs)
return 1;
- ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
+ ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
return -EINVAL;
}
@@ -861,12 +861,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
* numbers.
*/
if (snod->type != UBIFS_CS_NODE) {
- ubifs_err("first log node at LEB %d:%d is not CS node",
+ ubifs_err(c, "first log node at LEB %d:%d is not CS node",
lnum, offs);
goto out_dump;
}
if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
- ubifs_err("first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
+ ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
lnum, offs,
(unsigned long long)le64_to_cpu(node->cmt_no),
c->cmt_no);
@@ -891,7 +891,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
/* Make sure the first node sits at offset zero of the LEB */
if (snod->offs != 0) {
- ubifs_err("first node is not at zero offset");
+ ubifs_err(c, "first node is not at zero offset");
goto out_dump;
}
@@ -899,12 +899,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
cond_resched();
if (snod->sqnum >= SQNUM_WATERMARK) {
- ubifs_err("file system's life ended");
+ ubifs_err(c, "file system's life ended");
goto out_dump;
}
if (snod->sqnum < c->cs_sqnum) {
- ubifs_err("bad sqnum %llu, commit sqnum %llu",
+ ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
snod->sqnum, c->cs_sqnum);
goto out_dump;
}
@@ -934,12 +934,12 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
case UBIFS_CS_NODE:
/* Make sure it sits at the beginning of LEB */
if (snod->offs != 0) {
- ubifs_err("unexpected node in log");
+ ubifs_err(c, "unexpected node in log");
goto out_dump;
}
break;
default:
- ubifs_err("unexpected node in log");
+ ubifs_err(c, "unexpected node in log");
goto out_dump;
}
}
@@ -955,7 +955,7 @@ out:
return err;
out_dump:
- ubifs_err("log error detected while replaying the log at LEB %d:%d",
+ ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
lnum, offs + snod->offs);
ubifs_dump_node(c, snod->node);
ubifs_scan_destroy(sleb);
@@ -1017,7 +1017,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
return free; /* Error code */
if (c->ihead_offs != c->leb_size - free) {
- ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
+ ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
c->ihead_offs);
return -EINVAL;
}
@@ -1040,7 +1040,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
* someting went wrong and we cannot proceed mounting
* the file-system.
*/
- ubifs_err("no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
+ ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
lnum, 0);
err = -EINVAL;
}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 79c6dbbc0e04..f4fbc7b6b794 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -335,7 +335,7 @@ static int create_default_filesystem(struct ubifs_info *c)
if (err)
return err;
- ubifs_msg("default file-system created");
+ ubifs_msg(c, "default file-system created");
return 0;
}
@@ -365,13 +365,13 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
}
if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
- ubifs_err("min. I/O unit mismatch: %d in superblock, %d real",
+ ubifs_err(c, "min. I/O unit mismatch: %d in superblock, %d real",
le32_to_cpu(sup->min_io_size), c->min_io_size);
goto failed;
}
if (le32_to_cpu(sup->leb_size) != c->leb_size) {
- ubifs_err("LEB size mismatch: %d in superblock, %d real",
+ ubifs_err(c, "LEB size mismatch: %d in superblock, %d real",
le32_to_cpu(sup->leb_size), c->leb_size);
goto failed;
}
@@ -393,33 +393,33 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
- ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
+ ubifs_err(c, "bad LEB count: %d in superblock, %d on UBI volume, %d minimum required",
c->leb_cnt, c->vi.size, min_leb_cnt);
goto failed;
}
if (c->max_leb_cnt < c->leb_cnt) {
- ubifs_err("max. LEB count %d less than LEB count %d",
+ ubifs_err(c, "max. LEB count %d less than LEB count %d",
c->max_leb_cnt, c->leb_cnt);
goto failed;
}
if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
- ubifs_err("too few main LEBs count %d, must be at least %d",
+ ubifs_err(c, "too few main LEBs count %d, must be at least %d",
c->main_lebs, UBIFS_MIN_MAIN_LEBS);
goto failed;
}
max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS;
if (c->max_bud_bytes < max_bytes) {
- ubifs_err("too small journal (%lld bytes), must be at least %lld bytes",
+ ubifs_err(c, "too small journal (%lld bytes), must be at least %lld bytes",
c->max_bud_bytes, max_bytes);
goto failed;
}
max_bytes = (long long)c->leb_size * c->main_lebs;
if (c->max_bud_bytes > max_bytes) {
- ubifs_err("too large journal size (%lld bytes), only %lld bytes available in the main area",
+ ubifs_err(c, "too large journal size (%lld bytes), only %lld bytes available in the main area",
c->max_bud_bytes, max_bytes);
goto failed;
}
@@ -468,7 +468,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
return 0;
failed:
- ubifs_err("bad superblock, error %d", err);
+ ubifs_err(c, "bad superblock, error %d", err);
ubifs_dump_node(c, sup);
return -EINVAL;
}
@@ -549,12 +549,12 @@ int ubifs_read_superblock(struct ubifs_info *c)
ubifs_assert(!c->ro_media || c->ro_mount);
if (!c->ro_mount ||
c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
- ubifs_err("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+ ubifs_err(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
c->fmt_version, c->ro_compat_version,
UBIFS_FORMAT_VERSION,
UBIFS_RO_COMPAT_VERSION);
if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
- ubifs_msg("only R/O mounting is possible");
+ ubifs_msg(c, "only R/O mounting is possible");
err = -EROFS;
} else
err = -EINVAL;
@@ -570,7 +570,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
}
if (c->fmt_version < 3) {
- ubifs_err("on-flash format version %d is not supported",
+ ubifs_err(c, "on-flash format version %d is not supported",
c->fmt_version);
err = -EINVAL;
goto out;
@@ -595,7 +595,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->key_len = UBIFS_SK_LEN;
break;
default:
- ubifs_err("unsupported key format");
+ ubifs_err(c, "unsupported key format");
err = -EINVAL;
goto out;
}
@@ -785,7 +785,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
ubifs_assert(c->space_fixup);
ubifs_assert(!c->ro_mount);
- ubifs_msg("start fixing up free space");
+ ubifs_msg(c, "start fixing up free space");
err = fixup_free_space(c);
if (err)
@@ -804,6 +804,6 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
if (err)
return err;
- ubifs_msg("free space fixup complete");
+ ubifs_msg(c, "free space fixup complete");
return err;
}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 89adbc4d08ac..aab87340d3de 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -100,7 +100,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
if (pad_len < 0 ||
offs + node_len + pad_len > c->leb_size) {
if (!quiet) {
- ubifs_err("bad pad node at LEB %d:%d",
+ ubifs_err(c, "bad pad node at LEB %d:%d",
lnum, offs);
ubifs_dump_node(c, pad);
}
@@ -110,7 +110,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
/* Make the node pads to 8-byte boundary */
if ((node_len + pad_len) & 7) {
if (!quiet)
- ubifs_err("bad padding length %d - %d",
+ ubifs_err(c, "bad padding length %d - %d",
offs, offs + node_len + pad_len);
return SCANNED_A_BAD_PAD_NODE;
}
@@ -152,7 +152,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
if (err && err != -EBADMSG) {
- ubifs_err("cannot read %d bytes from LEB %d:%d, error %d",
+ ubifs_err(c, "cannot read %d bytes from LEB %d:%d, error %d",
c->leb_size - offs, lnum, offs, err);
kfree(sleb);
return ERR_PTR(err);
@@ -240,11 +240,11 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
{
int len;
- ubifs_err("corruption at LEB %d:%d", lnum, offs);
+ ubifs_err(c, "corruption at LEB %d:%d", lnum, offs);
len = c->leb_size - offs;
if (len > 8192)
len = 8192;
- ubifs_err("first %d bytes from LEB %d:%d", len, lnum, offs);
+ ubifs_err(c, "first %d bytes from LEB %d:%d", len, lnum, offs);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
}
@@ -299,16 +299,16 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
switch (ret) {
case SCANNED_GARBAGE:
- ubifs_err("garbage");
+ ubifs_err(c, "garbage");
goto corrupted;
case SCANNED_A_NODE:
break;
case SCANNED_A_CORRUPT_NODE:
case SCANNED_A_BAD_PAD_NODE:
- ubifs_err("bad node");
+ ubifs_err(c, "bad node");
goto corrupted;
default:
- ubifs_err("unknown");
+ ubifs_err(c, "unknown");
err = -EINVAL;
goto error;
}
@@ -325,7 +325,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
if (offs % c->min_io_size) {
if (!quiet)
- ubifs_err("empty space starts at non-aligned offset %d",
+ ubifs_err(c, "empty space starts at non-aligned offset %d",
offs);
goto corrupted;
}
@@ -338,7 +338,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
for (; len; offs++, buf++, len--)
if (*(uint8_t *)buf != 0xff) {
if (!quiet)
- ubifs_err("corrupt empty space at LEB %d:%d",
+ ubifs_err(c, "corrupt empty space at LEB %d:%d",
lnum, offs);
goto corrupted;
}
@@ -348,14 +348,14 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
corrupted:
if (!quiet) {
ubifs_scanned_corruption(c, lnum, offs, buf);
- ubifs_err("LEB %d scanning failed", lnum);
+ ubifs_err(c, "LEB %d scanning failed", lnum);
}
err = -EUCLEAN;
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
error:
- ubifs_err("LEB %d scanning failed, error %d", lnum, err);
+ ubifs_err(c, "LEB %d scanning failed, error %d", lnum, err);
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 93e946561c5c..75e6f04bb795 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -70,13 +70,13 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
const struct ubifs_inode *ui = ubifs_inode(inode);
if (inode->i_size > c->max_inode_sz) {
- ubifs_err("inode is too large (%lld)",
+ ubifs_err(c, "inode is too large (%lld)",
(long long)inode->i_size);
return 1;
}
if (ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
- ubifs_err("unknown compression type %d", ui->compr_type);
+ ubifs_err(c, "unknown compression type %d", ui->compr_type);
return 2;
}
@@ -90,7 +90,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
return 5;
if (!ubifs_compr_present(ui->compr_type)) {
- ubifs_warn("inode %lu uses '%s' compression, but it was not compiled in",
+ ubifs_warn(c, "inode %lu uses '%s' compression, but it was not compiled in",
inode->i_ino, ubifs_compr_name(ui->compr_type));
}
@@ -242,14 +242,14 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
return inode;
out_invalid:
- ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err);
+ ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
ubifs_dump_node(c, ino);
ubifs_dump_inode(c, inode);
err = -EINVAL;
out_ino:
kfree(ino);
out:
- ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err);
+ ubifs_err(c, "failed to read inode %lu, error %d", inode->i_ino, err);
iget_failed(inode);
return ERR_PTR(err);
}
@@ -319,7 +319,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (inode->i_nlink) {
err = ubifs_jnl_write_inode(c, inode);
if (err)
- ubifs_err("can't write inode %lu, error %d",
+ ubifs_err(c, "can't write inode %lu, error %d",
inode->i_ino, err);
else
err = dbg_check_inode_size(c, inode, ui->ui_size);
@@ -363,7 +363,7 @@ static void ubifs_evict_inode(struct inode *inode)
* Worst case we have a lost orphan inode wasting space, so a
* simple error message is OK here.
*/
- ubifs_err("can't delete inode %lu, error %d",
+ ubifs_err(c, "can't delete inode %lu, error %d",
inode->i_ino, err);
out:
@@ -492,17 +492,17 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
static int init_constants_early(struct ubifs_info *c)
{
if (c->vi.corrupted) {
- ubifs_warn("UBI volume is corrupted - read-only mode");
+ ubifs_warn(c, "UBI volume is corrupted - read-only mode");
c->ro_media = 1;
}
if (c->di.ro_mode) {
- ubifs_msg("read-only UBI device");
+ ubifs_msg(c, "read-only UBI device");
c->ro_media = 1;
}
if (c->vi.vol_type == UBI_STATIC_VOLUME) {
- ubifs_msg("static UBI volume - read-only mode");
+ ubifs_msg(c, "static UBI volume - read-only mode");
c->ro_media = 1;
}
@@ -516,19 +516,19 @@ static int init_constants_early(struct ubifs_info *c)
c->max_write_shift = fls(c->max_write_size) - 1;
if (c->leb_size < UBIFS_MIN_LEB_SZ) {
- ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
+ ubifs_err(c, "too small LEBs (%d bytes), min. is %d bytes",
c->leb_size, UBIFS_MIN_LEB_SZ);
return -EINVAL;
}
if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
- ubifs_err("too few LEBs (%d), min. is %d",
+ ubifs_err(c, "too few LEBs (%d), min. is %d",
c->leb_cnt, UBIFS_MIN_LEB_CNT);
return -EINVAL;
}
if (!is_power_of_2(c->min_io_size)) {
- ubifs_err("bad min. I/O size %d", c->min_io_size);
+ ubifs_err(c, "bad min. I/O size %d", c->min_io_size);
return -EINVAL;
}
@@ -539,7 +539,7 @@ static int init_constants_early(struct ubifs_info *c)
if (c->max_write_size < c->min_io_size ||
c->max_write_size % c->min_io_size ||
!is_power_of_2(c->max_write_size)) {
- ubifs_err("bad write buffer size %d for %d min. I/O unit",
+ ubifs_err(c, "bad write buffer size %d for %d min. I/O unit",
c->max_write_size, c->min_io_size);
return -EINVAL;
}
@@ -665,7 +665,7 @@ static int init_constants_sb(struct ubifs_info *c)
tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
tmp = ALIGN(tmp, c->min_io_size);
if (tmp > c->leb_size) {
- ubifs_err("too small LEB size %d, at least %d needed",
+ ubifs_err(c, "too small LEB size %d, at least %d needed",
c->leb_size, tmp);
return -EINVAL;
}
@@ -680,7 +680,7 @@ static int init_constants_sb(struct ubifs_info *c)
tmp /= c->leb_size;
tmp += 1;
if (c->log_lebs < tmp) {
- ubifs_err("too small log %d LEBs, required min. %d LEBs",
+ ubifs_err(c, "too small log %d LEBs, required min. %d LEBs",
c->log_lebs, tmp);
return -EINVAL;
}
@@ -772,7 +772,7 @@ static int take_gc_lnum(struct ubifs_info *c)
int err;
if (c->gc_lnum == -1) {
- ubifs_err("no LEB for GC");
+ ubifs_err(c, "no LEB for GC");
return -EINVAL;
}
@@ -857,7 +857,7 @@ static void free_orphans(struct ubifs_info *c)
orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
list_del(&orph->list);
kfree(orph);
- ubifs_err("orphan list not empty at unmount");
+ ubifs_err(c, "orphan list not empty at unmount");
}
vfree(c->orph_buf);
@@ -954,7 +954,8 @@ static const match_table_t tokens = {
*/
static int parse_standard_option(const char *option)
{
- ubifs_msg("parse %s", option);
+
+ pr_notice("UBIFS: parse %s\n", option);
if (!strcmp(option, "sync"))
return MS_SYNCHRONOUS;
return 0;
@@ -1026,7 +1027,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
else if (!strcmp(name, "zlib"))
c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
else {
- ubifs_err("unknown compressor \"%s\"", name);
+ ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready?
kfree(name);
return -EINVAL;
}
@@ -1042,7 +1043,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
flag = parse_standard_option(p);
if (!flag) {
- ubifs_err("unrecognized mount option \"%s\" or missing value",
+ ubifs_err(c, "unrecognized mount option \"%s\" or missing value",
p);
return -EINVAL;
}
@@ -1105,7 +1106,7 @@ again:
}
/* Just disable bulk-read */
- ubifs_warn("cannot allocate %d bytes of memory for bulk-read, disabling it",
+ ubifs_warn(c, "cannot allocate %d bytes of memory for bulk-read, disabling it",
c->max_bu_buf_len);
c->mount_opts.bulk_read = 1;
c->bulk_read = 0;
@@ -1124,7 +1125,7 @@ static int check_free_space(struct ubifs_info *c)
{
ubifs_assert(c->dark_wm > 0);
if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
- ubifs_err("insufficient free space to mount in R/W mode");
+ ubifs_err(c, "insufficient free space to mount in R/W mode");
ubifs_dump_budg(c, &c->bi);
ubifs_dump_lprops(c);
return -ENOSPC;
@@ -1166,14 +1167,14 @@ static int mount_ubifs(struct ubifs_info *c)
* This UBI volume is empty, and read-only, or the file system
* is mounted read-only - we cannot format it.
*/
- ubifs_err("can't format empty UBI volume: read-only %s",
+ ubifs_err(c, "can't format empty UBI volume: read-only %s",
c->ro_media ? "UBI volume" : "mount");
err = -EROFS;
goto out_free;
}
if (c->ro_media && !c->ro_mount) {
- ubifs_err("cannot mount read-write - read-only media");
+ ubifs_err(c, "cannot mount read-write - read-only media");
err = -EROFS;
goto out_free;
}
@@ -1221,7 +1222,7 @@ static int mount_ubifs(struct ubifs_info *c)
* or overridden by mount options is actually compiled in.
*/
if (!ubifs_compr_present(c->default_compr)) {
- ubifs_err("'compressor \"%s\" is not compiled in",
+ ubifs_err(c, "'compressor \"%s\" is not compiled in",
ubifs_compr_name(c->default_compr));
err = -ENOTSUPP;
goto out_free;
@@ -1250,7 +1251,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
- ubifs_err("cannot spawn \"%s\", error %d",
+ ubifs_err(c, "cannot spawn \"%s\", error %d",
c->bgt_name, err);
goto out_wbufs;
}
@@ -1264,7 +1265,7 @@ static int mount_ubifs(struct ubifs_info *c)
init_constants_master(c);
if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
- ubifs_msg("recovery needed");
+ ubifs_msg(c, "recovery needed");
c->need_recovery = 1;
}
@@ -1284,7 +1285,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_lpt;
}
- if (!c->ro_mount) {
+ if (!c->ro_mount && !c->need_recovery) {
/*
* Set the "dirty" flag so that if we reboot uncleanly we
* will notice this immediately on the next mount.
@@ -1373,10 +1374,10 @@ static int mount_ubifs(struct ubifs_info *c)
if (c->need_recovery) {
if (c->ro_mount)
- ubifs_msg("recovery deferred");
+ ubifs_msg(c, "recovery deferred");
else {
c->need_recovery = 0;
- ubifs_msg("recovery completed");
+ ubifs_msg(c, "recovery completed");
/*
* GC LEB has to be empty and taken at this point. But
* the journal head LEBs may also be accounted as
@@ -1397,20 +1398,20 @@ static int mount_ubifs(struct ubifs_info *c)
c->mounting = 0;
- ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s",
+ ubifs_msg(c, "UBIFS: mounted UBI device %d, volume %d, name \"%s\"%s",
c->vi.ubi_num, c->vi.vol_id, c->vi.name,
c->ro_mount ? ", R/O mode" : "");
x = (long long)c->main_lebs * c->leb_size;
y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
- ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
+ ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
c->leb_size, c->leb_size >> 10, c->min_io_size,
c->max_write_size);
- ubifs_msg("FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
+ ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
x, x >> 20, c->main_lebs,
y, y >> 20, c->log_lebs + c->max_bud_cnt);
- ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
+ ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)",
c->report_rp_size, c->report_rp_size >> 10);
- ubifs_msg("media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
+ ubifs_msg(c, "media format: w%d/r%d (latest is w%d/r%d), UUID %pUB%s",
c->fmt_version, c->ro_compat_version,
UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION, c->uuid,
c->big_lpt ? ", big LPT model" : ", small LPT model");
@@ -1543,8 +1544,8 @@ static int ubifs_remount_rw(struct ubifs_info *c)
int err, lnum;
if (c->rw_incompat) {
- ubifs_err("the file-system is not R/W-compatible");
- ubifs_msg("on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
+ ubifs_err(c, "the file-system is not R/W-compatible");
+ ubifs_msg(c, "on-flash format version is w%d/r%d, but software only supports up to version w%d/r%d",
c->fmt_version, c->ro_compat_version,
UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
return -EROFS;
@@ -1581,7 +1582,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
if (c->need_recovery) {
- ubifs_msg("completing deferred recovery");
+ ubifs_msg(c, "completing deferred recovery");
err = ubifs_write_rcvrd_mst_node(c);
if (err)
goto out;
@@ -1630,7 +1631,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (IS_ERR(c->bgt)) {
err = PTR_ERR(c->bgt);
c->bgt = NULL;
- ubifs_err("cannot spawn \"%s\", error %d",
+ ubifs_err(c, "cannot spawn \"%s\", error %d",
c->bgt_name, err);
goto out;
}
@@ -1664,7 +1665,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
if (c->need_recovery) {
c->need_recovery = 0;
- ubifs_msg("deferred recovery completed");
+ ubifs_msg(c, "deferred recovery completed");
} else {
/*
* Do not run the debugging space check if the were doing
@@ -1752,8 +1753,7 @@ static void ubifs_put_super(struct super_block *sb)
int i;
struct ubifs_info *c = sb->s_fs_info;
- ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
- c->vi.vol_id);
+ ubifs_msg(c, "un-mount UBI device %d", c->vi.ubi_num);
/*
* The following asserts are only valid if there has not been a failure
@@ -1809,7 +1809,7 @@ static void ubifs_put_super(struct super_block *sb)
* next mount, so we just print a message and
* continue to unmount normally.
*/
- ubifs_err("failed to write master node, error %d",
+ ubifs_err(c, "failed to write master node, error %d",
err);
} else {
for (i = 0; i < c->jhead_cnt; i++)
@@ -1834,17 +1834,17 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
err = ubifs_parse_options(c, data, 1);
if (err) {
- ubifs_err("invalid or unknown remount parameter");
+ ubifs_err(c, "invalid or unknown remount parameter");
return err;
}
if (c->ro_mount && !(*flags & MS_RDONLY)) {
if (c->ro_error) {
- ubifs_msg("cannot re-mount R/W due to prior errors");
+ ubifs_msg(c, "cannot re-mount R/W due to prior errors");
return -EROFS;
}
if (c->ro_media) {
- ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
+ ubifs_msg(c, "cannot re-mount R/W - UBI volume is R/O");
return -EROFS;
}
err = ubifs_remount_rw(c);
@@ -1852,7 +1852,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
return err;
} else if (!c->ro_mount && (*flags & MS_RDONLY)) {
if (c->ro_error) {
- ubifs_msg("cannot re-mount R/O due to prior errors");
+ ubifs_msg(c, "cannot re-mount R/O due to prior errors");
return -EROFS;
}
ubifs_remount_ro(c);
@@ -2104,8 +2104,8 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
*/
ubi = open_ubi(name, UBI_READONLY);
if (IS_ERR(ubi)) {
- ubifs_err("cannot open \"%s\", error %d",
- name, (int)PTR_ERR(ubi));
+ pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
+ current->pid, name, (int)PTR_ERR(ubi));
return ERR_CAST(ubi);
}
@@ -2233,8 +2233,8 @@ static int __init ubifs_init(void)
* UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
*/
if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
- ubifs_err("VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
- (unsigned int)PAGE_CACHE_SIZE);
+ pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
+ current->pid, (unsigned int)PAGE_CACHE_SIZE);
return -EINVAL;
}
@@ -2257,7 +2257,8 @@ static int __init ubifs_init(void)
err = register_filesystem(&ubifs_fs_type);
if (err) {
- ubifs_err("cannot register file system, error %d", err);
+ pr_err("UBIFS error (pid %d): cannot register file system, error %d",
+ current->pid, err);
goto out_dbg;
}
return 0;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 6793db0754f6..957f5757f374 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -98,7 +98,7 @@ static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
else if (offs > o->offs)
p = &(*p)->rb_right;
else {
- ubifs_err("old idx added twice!");
+ ubifs_err(c, "old idx added twice!");
kfree(old_idx);
return 0;
}
@@ -447,7 +447,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
if (err) {
- ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
+ ubifs_err(c, "cannot read node type %d from LEB %d:%d, error %d",
type, lnum, offs, err);
return err;
}
@@ -1684,27 +1684,27 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
int err, len;
if (ch->node_type != UBIFS_DATA_NODE) {
- ubifs_err("bad node type (%d but expected %d)",
+ ubifs_err(c, "bad node type (%d but expected %d)",
ch->node_type, UBIFS_DATA_NODE);
goto out_err;
}
err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
if (err) {
- ubifs_err("expected node type %d", UBIFS_DATA_NODE);
+ ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
goto out;
}
len = le32_to_cpu(ch->len);
if (len != zbr->len) {
- ubifs_err("bad node length %d, expected %d", len, zbr->len);
+ ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
goto out_err;
}
/* Make sure the key of the read node is correct */
key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
if (!keys_eq(c, &zbr->key, &key1)) {
- ubifs_err("bad key in node at LEB %d:%d",
+ ubifs_err(c, "bad key in node at LEB %d:%d",
zbr->lnum, zbr->offs);
dbg_tnck(&zbr->key, "looked for key ");
dbg_tnck(&key1, "found node's key ");
@@ -1716,7 +1716,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
out_err:
err = -EINVAL;
out:
- ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs);
+ ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
ubifs_dump_node(c, buf);
dump_stack();
return err;
@@ -1741,7 +1741,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
len = bu->zbranch[bu->cnt - 1].offs;
len += bu->zbranch[bu->cnt - 1].len - offs;
if (len > bu->buf_len) {
- ubifs_err("buffer too small %d vs %d", bu->buf_len, len);
+ ubifs_err(c, "buffer too small %d vs %d", bu->buf_len, len);
return -EINVAL;
}
@@ -1757,7 +1757,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
return -EAGAIN;
if (err && err != -EBADMSG) {
- ubifs_err("failed to read from LEB %d:%d, error %d",
+ ubifs_err(c, "failed to read from LEB %d:%d, error %d",
lnum, offs, err);
dump_stack();
dbg_tnck(&bu->key, "key ");
@@ -3313,7 +3313,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
out_dump:
block = key_block(c, key);
- ubifs_err("inode %lu has size %lld, but there are data at offset %lld",
+ ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld",
(unsigned long)inode->i_ino, size,
((loff_t)block) << UBIFS_BLOCK_SHIFT);
mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 7a205e046776..b45345d701e7 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -53,7 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
br->offs = cpu_to_le32(zbr->offs);
br->len = cpu_to_le32(zbr->len);
if (!zbr->lnum || !zbr->len) {
- ubifs_err("bad ref in znode");
+ ubifs_err(c, "bad ref in znode");
ubifs_dump_znode(c, znode);
if (zbr->znode)
ubifs_dump_znode(c, zbr->znode);
@@ -384,7 +384,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
* Do not print scary warnings if the debugging
* option which forces in-the-gaps is enabled.
*/
- ubifs_warn("out of space");
+ ubifs_warn(c, "out of space");
ubifs_dump_budg(c, &c->bi);
ubifs_dump_lprops(c);
}
@@ -441,7 +441,7 @@ static int layout_in_empty_space(struct ubifs_info *c)
/* Determine the index node position */
if (lnum == -1) {
if (c->ileb_nxt >= c->ileb_cnt) {
- ubifs_err("out of space");
+ ubifs_err(c, "out of space");
return -ENOSPC;
}
lnum = c->ilebs[c->ileb_nxt++];
@@ -855,7 +855,7 @@ static int write_index(struct ubifs_info *c)
br->offs = cpu_to_le32(zbr->offs);
br->len = cpu_to_le32(zbr->len);
if (!zbr->lnum || !zbr->len) {
- ubifs_err("bad ref in znode");
+ ubifs_err(c, "bad ref in znode");
ubifs_dump_znode(c, znode);
if (zbr->znode)
ubifs_dump_znode(c, zbr->znode);
@@ -875,7 +875,7 @@ static int write_index(struct ubifs_info *c)
if (lnum != znode->lnum || offs != znode->offs ||
len != znode->len) {
- ubifs_err("inconsistent znode posn");
+ ubifs_err(c, "inconsistent znode posn");
return -EINVAL;
}
@@ -973,7 +973,7 @@ static int write_index(struct ubifs_info *c)
if (lnum != c->dbg->new_ihead_lnum ||
buf_offs != c->dbg->new_ihead_offs) {
- ubifs_err("inconsistent ihead");
+ ubifs_err(c, "inconsistent ihead");
return -EINVAL;
}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index f6bf8995c7b1..93f5b7859e6f 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -293,9 +293,9 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
lnum, offs, znode->level, znode->child_cnt);
if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
- ubifs_err("current fanout %d, branch count %d",
+ ubifs_err(c, "current fanout %d, branch count %d",
c->fanout, znode->child_cnt);
- ubifs_err("max levels %d, znode level %d",
+ ubifs_err(c, "max levels %d, znode level %d",
UBIFS_MAX_LEVELS, znode->level);
err = 1;
goto out_dump;
@@ -316,7 +316,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
if (zbr->lnum < c->main_first ||
zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
- ubifs_err("bad branch %d", i);
+ ubifs_err(c, "bad branch %d", i);
err = 2;
goto out_dump;
}
@@ -328,7 +328,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
case UBIFS_XENT_KEY:
break;
default:
- ubifs_err("bad key type at slot %d: %d",
+ ubifs_err(c, "bad key type at slot %d: %d",
i, key_type(c, &zbr->key));
err = 3;
goto out_dump;
@@ -340,17 +340,17 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
type = key_type(c, &zbr->key);
if (c->ranges[type].max_len == 0) {
if (zbr->len != c->ranges[type].len) {
- ubifs_err("bad target node (type %d) length (%d)",
+ ubifs_err(c, "bad target node (type %d) length (%d)",
type, zbr->len);
- ubifs_err("have to be %d", c->ranges[type].len);
+ ubifs_err(c, "have to be %d", c->ranges[type].len);
err = 4;
goto out_dump;
}
} else if (zbr->len < c->ranges[type].min_len ||
zbr->len > c->ranges[type].max_len) {
- ubifs_err("bad target node (type %d) length (%d)",
+ ubifs_err(c, "bad target node (type %d) length (%d)",
type, zbr->len);
- ubifs_err("have to be in range of %d-%d",
+ ubifs_err(c, "have to be in range of %d-%d",
c->ranges[type].min_len,
c->ranges[type].max_len);
err = 5;
@@ -370,12 +370,12 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
cmp = keys_cmp(c, key1, key2);
if (cmp > 0) {
- ubifs_err("bad key order (keys %d and %d)", i, i + 1);
+ ubifs_err(c, "bad key order (keys %d and %d)", i, i + 1);
err = 6;
goto out_dump;
} else if (cmp == 0 && !is_hash_key(c, key1)) {
/* These can only be keys with colliding hash */
- ubifs_err("keys %d and %d are not hashed but equivalent",
+ ubifs_err(c, "keys %d and %d are not hashed but equivalent",
i, i + 1);
err = 7;
goto out_dump;
@@ -386,7 +386,7 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
return 0;
out_dump:
- ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
+ ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
ubifs_dump_node(c, idx);
kfree(idx);
return -EINVAL;
@@ -482,7 +482,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
/* Make sure the key of the read node is correct */
key_read(c, node + UBIFS_KEY_OFFSET, &key1);
if (!keys_eq(c, key, &key1)) {
- ubifs_err("bad key in node at LEB %d:%d",
+ ubifs_err(c, "bad key in node at LEB %d:%d",
zbr->lnum, zbr->offs);
dbg_tnck(key, "looked for key ");
dbg_tnck(&key1, "but found node's key ");
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index bc04b9c69891..de759022f3d6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -43,15 +43,19 @@
#define UBIFS_VERSION 1
/* Normal UBIFS messages */
-#define ubifs_msg(fmt, ...) pr_notice("UBIFS: " fmt "\n", ##__VA_ARGS__)
+#define ubifs_msg(c, fmt, ...) \
+ pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \
+ (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
/* UBIFS error messages */
-#define ubifs_err(fmt, ...) \
- pr_err("UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
+#define ubifs_err(c, fmt, ...) \
+ pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \
+ (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
__func__, ##__VA_ARGS__)
/* UBIFS warning messages */
-#define ubifs_warn(fmt, ...) \
- pr_warn("UBIFS warning (pid %d): %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__)
+#define ubifs_warn(c, fmt, ...) \
+ pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \
+ (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
+ __func__, ##__VA_ARGS__)
/*
* A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
* object as an argument.
@@ -59,7 +63,7 @@
#define ubifs_errc(c, fmt, ...) \
do { \
if (!(c)->probing) \
- ubifs_err(fmt, ##__VA_ARGS__); \
+ ubifs_err(c, fmt, ##__VA_ARGS__); \
} while (0)
/* UBIFS file system VFS magic number */
@@ -158,7 +162,7 @@
#define WORST_COMPR_FACTOR 2
/*
- * How much memory is needed for a buffer where we comress a data node.
+ * How much memory is needed for a buffer where we compress a data node.
*/
#define COMPRESSED_DATA_NODE_BUF_SZ \
(UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
@@ -664,7 +668,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
* @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
* fields
* @softlimit: soft write-buffer timeout interval
- * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
+ * @delta: hard and soft timeouts delta (the timer expire interval is @softlimit
* and @softlimit + @delta)
* @timer: write-buffer timer
* @no_timer: non-zero if this write-buffer does not have a timer
@@ -930,9 +934,9 @@ struct ubifs_orphan {
/**
* struct ubifs_mount_opts - UBIFS-specific mount options information.
* @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
- * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable)
+ * @bulk_read: enable/disable bulk-reads (%0 default, %1 disable, %2 enable)
* @chk_data_crc: enable/disable CRC data checking when reading data nodes
- * (%0 default, %1 disabe, %2 enable)
+ * (%0 default, %1 disable, %2 enable)
* @override_compr: override default compressor (%0 - do not override and use
* superblock compressor, %1 - override and use compressor
* specified in @compr_type)
@@ -962,9 +966,9 @@ struct ubifs_mount_opts {
* optimization)
* @nospace_rp: the same as @nospace, but additionally means that even reserved
* pool is full
- * @page_budget: budget for a page (constant, nenver changed after mount)
- * @inode_budget: budget for an inode (constant, nenver changed after mount)
- * @dent_budget: budget for a directory entry (constant, nenver changed after
+ * @page_budget: budget for a page (constant, never changed after mount)
+ * @inode_budget: budget for an inode (constant, never changed after mount)
+ * @dent_budget: budget for a directory entry (constant, never changed after
* mount)
*/
struct ubifs_budg_info {
@@ -1787,10 +1791,10 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* compressor.c */
int __init ubifs_compressors_init(void);
void ubifs_compressors_exit(void);
-void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
- int *compr_type);
-int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
- int compr_type);
+void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len,
+ void *out_buf, int *out_len, int *compr_type);
+int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
+ void *out, int *out_len, int compr_type);
#include "debug.h"
#include "misc.h"
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index a92be244a6fb..3659b1934500 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -108,7 +108,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
.dirtied_ino_d = ALIGN(host_ui->data_len, 8) };
if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) {
- ubifs_err("inode %lu already has too many xattrs (%d), cannot create more",
+ ubifs_err(c, "inode %lu already has too many xattrs (%d), cannot create more",
host->i_ino, host_ui->xattr_cnt);
return -ENOSPC;
}
@@ -120,7 +120,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
*/
names_len = host_ui->xattr_names + host_ui->xattr_cnt + nm->len + 1;
if (names_len > XATTR_LIST_MAX) {
- ubifs_err("cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d",
+ ubifs_err(c, "cannot add one more xattr name to inode %lu, total names length would become %d, max. is %d",
host->i_ino, names_len, XATTR_LIST_MAX);
return -ENOSPC;
}
@@ -288,13 +288,13 @@ static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
inode = ubifs_iget(c->vfs_sb, inum);
if (IS_ERR(inode)) {
- ubifs_err("dead extended attribute entry, error %d",
+ ubifs_err(c, "dead extended attribute entry, error %d",
(int)PTR_ERR(inode));
return inode;
}
if (ubifs_inode(inode)->xattr)
return inode;
- ubifs_err("corrupt extended attribute entry");
+ ubifs_err(c, "corrupt extended attribute entry");
iput(inode);
return ERR_PTR(-EINVAL);
}
@@ -412,7 +412,7 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
if (buf) {
/* If @buf is %NULL we are supposed to return the length */
if (ui->data_len > size) {
- ubifs_err("buffer size %zd, xattr len %d",
+ ubifs_err(c, "buffer size %zd, xattr len %d",
size, ui->data_len);
err = -ERANGE;
goto out_iput;
@@ -485,7 +485,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
kfree(pxent);
if (err != -ENOENT) {
- ubifs_err("cannot find next direntry, error %d", err);
+ ubifs_err(c, "cannot find next direntry, error %d", err);
return err;
}
@@ -657,8 +657,10 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
&init_xattrs, 0);
mutex_unlock(&inode->i_mutex);
- if (err)
- ubifs_err("cannot initialize security for inode %lu, error %d",
+ if (err) {
+ struct ubifs_info *c = dentry->i_sb->s_fs_info;
+ ubifs_err(c, "cannot initialize security for inode %lu, error %d",
inode->i_ino, err);
+ }
return err;
}
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1ba2baaf4367..6d6a96b4e73f 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
#include "udfdecl.h"
-#include <linux/buffer_head.h>
#include <linux/bitops.h>
#include "udf_i.h"
@@ -63,15 +62,14 @@ static int __load_block_bitmap(struct super_block *sb,
block_group, nr_groups);
}
- if (bitmap->s_block_bitmap[block_group]) {
+ if (bitmap->s_block_bitmap[block_group])
return block_group;
- } else {
- retval = read_block_bitmap(sb, bitmap, block_group,
- block_group);
- if (retval < 0)
- return retval;
- return block_group;
- }
+
+ retval = read_block_bitmap(sb, bitmap, block_group, block_group);
+ if (retval < 0)
+ return retval;
+
+ return block_group;
}
static inline int load_block_bitmap(struct super_block *sb,
@@ -358,7 +356,6 @@ static void udf_table_free_blocks(struct super_block *sb,
struct kernel_lb_addr eloc;
struct extent_position oepos, epos;
int8_t etype;
- int i;
struct udf_inode_info *iinfo;
mutex_lock(&sbi->s_alloc_mutex);
@@ -425,7 +422,6 @@ static void udf_table_free_blocks(struct super_block *sb,
}
if (epos.bh != oepos.bh) {
- i = -1;
oepos.block = epos.block;
brelse(oepos.bh);
get_bh(epos.bh);
@@ -762,7 +758,7 @@ inline int udf_prealloc_blocks(struct super_block *sb,
uint32_t block_count)
{
struct udf_part_map *map = &UDF_SB(sb)->s_partmaps[partition];
- sector_t allocated;
+ int allocated;
if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
allocated = udf_bitmap_prealloc_blocks(sb,
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 05e90edd1992..541a12b5792d 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -30,7 +30,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 3e44f575fb9c..c763fda257bf 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -16,7 +16,6 @@
#include <linux/fs.h>
#include <linux/string.h>
-#include <linux/buffer_head.h>
struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
struct udf_fileident_bh *fibh,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555fbeac..5dadad9960b9 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -33,8 +33,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "udf_i.h"
#include "udf_sb.h"
@@ -100,8 +99,7 @@ static int udf_adinicb_write_begin(struct file *file,
return 0;
}
-static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
/* Fallback to buffered I/O. */
@@ -121,21 +119,21 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t retval;
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- int err, pos;
- size_t count = iocb->ki_nbytes;
struct udf_inode_info *iinfo = UDF_I(inode);
+ int err;
mutex_lock(&inode->i_mutex);
+
+ retval = generic_write_checks(iocb, from);
+ if (retval <= 0)
+ goto out;
+
down_write(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- if (file->f_flags & O_APPEND)
- pos = inode->i_size;
- else
- pos = iocb->ki_pos;
+ loff_t end = iocb->ki_pos + iov_iter_count(from);
if (inode->i_sb->s_blocksize <
- (udf_file_entry_alloc_offset(inode) +
- pos + count)) {
+ (udf_file_entry_alloc_offset(inode) + end)) {
err = udf_expand_file_adinicb(inode);
if (err) {
mutex_unlock(&inode->i_mutex);
@@ -143,16 +141,14 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return err;
}
} else {
- if (pos + count > inode->i_size)
- iinfo->i_lenAlloc = pos + count;
- else
- iinfo->i_lenAlloc = inode->i_size;
+ iinfo->i_lenAlloc = max(end, inode->i_size);
up_write(&iinfo->i_data_sem);
}
} else
up_write(&iinfo->i_data_sem);
retval = __generic_file_write_iter(iocb, from);
+out:
mutex_unlock(&inode->i_mutex);
if (retval > 0) {
@@ -240,12 +236,10 @@ static int udf_release_file(struct inode *inode, struct file *filp)
}
const struct file_operations udf_file_operations = {
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
.unlocked_ioctl = udf_ioctl,
.open = generic_file_open,
.mmap = generic_file_mmap,
- .write = new_sync_write,
.write_iter = udf_file_write_iter,
.release = udf_release_file,
.fsync = generic_file_fsync,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d599098d..6afac3d561ac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -33,12 +33,11 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
#include "udf_i.h"
#include "udf_sb.h"
@@ -215,8 +214,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
-static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
struct file *file = iocb->ki_filp;
@@ -225,8 +223,8 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_iter_count(iter);
ssize_t ret;
- ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
- if (unlikely(ret < 0 && (rw & WRITE)))
+ ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block);
+ if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE))
udf_write_failed(mapping, offset + count);
return ret;
}
@@ -1637,7 +1635,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_get_lb_pblock(inode->i_sb, &iinfo->i_location, 0));
if (!bh) {
udf_debug("getblk failure\n");
- return -ENOMEM;
+ return -EIO;
}
lock_buffer(bh);
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index c175b4dabc14..71d1c25f360d 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -23,7 +23,6 @@
#include <linux/fs.h>
#include <linux/string.h>
-#include <linux/buffer_head.h>
#include <linux/crc-itu-t.h>
#include "udf_i.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 33b246b82c98..39661977c89c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/buffer_head.h>
#include <linux/sched.h>
#include <linux/crc-itu-t.h>
#include <linux/exportfs.h>
@@ -569,8 +568,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
*(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse =
cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
- if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- mark_inode_dirty(dir);
+ dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
+ mark_inode_dirty(dir);
if (fibh.sbh != fibh.ebh)
brelse(fibh.ebh);
brelse(fibh.sbh);
@@ -683,6 +682,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
cfi.fileCharacteristics |= FID_FILE_CHAR_DIRECTORY;
udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL);
inc_nlink(dir);
+ dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
mark_inode_dirty(dir);
unlock_new_inode(inode);
d_instantiate(dentry, inode);
@@ -1024,6 +1024,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
inc_nlink(inode);
inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
+ dir->i_ctime = dir->i_mtime = current_fs_time(dir->i_sb);
+ mark_inode_dirty(dir);
ihold(inode);
d_instantiate(dentry, inode);
@@ -1127,7 +1129,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
inode_dec_link_count(new_inode);
}
old_dir->i_ctime = old_dir->i_mtime = current_fs_time(old_dir->i_sb);
+ new_dir->i_ctime = new_dir->i_mtime = current_fs_time(new_dir->i_sb);
mark_inode_dirty(old_dir);
+ mark_inode_dirty(new_dir);
if (dir_fi) {
dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index d6caf01a2097..5f861ed287c3 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -24,7 +24,6 @@
#include <linux/fs.h>
#include <linux/string.h>
-#include <linux/buffer_head.h>
#include <linux/mutex.h>
uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index f169411c4ea0..6299f341967b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -48,7 +48,6 @@
#include <linux/stat.h>
#include <linux/cdrom.h>
#include <linux/nls.h>
-#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <linux/vmalloc.h>
#include <linux/errno.h>
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index ac10ca939f26..8dfbc4025e2f 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -27,7 +27,6 @@
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/pagemap.h>
-#include <linux/buffer_head.h>
#include "udf_i.h"
static int udf_pc_to_char(struct super_block *sb, unsigned char *from,
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 8a9657d7f7c6..42b8c57795cb 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -22,7 +22,6 @@
#include "udfdecl.h"
#include <linux/fs.h>
#include <linux/mm.h>
-#include <linux/buffer_head.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index c84ec010a676..042ddbf110cc 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -35,9 +35,7 @@
const struct file_operations ufs_file_operations = {
.llseek = generic_file_llseek,
- .read = new_sync_read,
.read_iter = generic_file_read_iter,
- .write = new_sync_write,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.open = generic_file_open,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1b8704..1d8eef9cf0f5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/pagevec.h>
@@ -1496,7 +1495,6 @@ xfs_end_io_direct_write(
STATIC ssize_t
xfs_vm_direct_IO(
- int rw,
struct kiocb *iocb,
struct iov_iter *iter,
loff_t offset)
@@ -1504,15 +1502,14 @@ xfs_vm_direct_IO(
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- if (rw & WRITE) {
- return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
- offset, xfs_get_blocks_direct,
+ if (iov_iter_rw(iter) == WRITE) {
+ return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+ xfs_get_blocks_direct,
xfs_end_io_direct_write, NULL,
DIO_ASYNC_EXTEND);
}
- return __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
- offset, xfs_get_blocks_direct,
- NULL, NULL, 0);
+ return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+ xfs_get_blocks_direct, NULL, NULL, 0);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d12fb44..1f12ad0a8585 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
#include "xfs_icache.h"
#include "xfs_pnfs.h"
-#include <linux/aio.h>
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
@@ -280,7 +279,7 @@ xfs_file_read_iter(
XFS_STATS_INC(xs_read_calls);
- if (unlikely(file->f_flags & O_DIRECT))
+ if (unlikely(iocb->ki_flags & IOCB_DIRECT))
ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
@@ -397,7 +396,8 @@ STATIC int /* error (positive) */
xfs_zero_last_block(
struct xfs_inode *ip,
xfs_fsize_t offset,
- xfs_fsize_t isize)
+ xfs_fsize_t isize,
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +425,7 @@ xfs_zero_last_block(
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
+ *did_zeroing = true;
return xfs_iozero(ip, isize, zero_len);
}
@@ -443,7 +444,8 @@ int /* error (positive) */
xfs_zero_eof(
struct xfs_inode *ip,
xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
+ xfs_fsize_t isize, /* current inode size */
+ bool *did_zeroing)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_zero_fsb;
@@ -465,7 +467,7 @@ xfs_zero_eof(
* We only zero a part of that block so it is handled specially.
*/
if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
- error = xfs_zero_last_block(ip, offset, isize);
+ error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
if (error)
return error;
}
@@ -525,6 +527,7 @@ xfs_zero_eof(
if (error)
return error;
+ *did_zeroing = true;
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
}
@@ -541,18 +544,19 @@ xfs_zero_eof(
*/
STATIC ssize_t
xfs_file_aio_write_checks(
- struct file *file,
- loff_t *pos,
- size_t *count,
+ struct kiocb *iocb,
+ struct iov_iter *from,
int *iolock)
{
+ struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- int error = 0;
+ ssize_t error = 0;
+ size_t count = iov_iter_count(from);
restart:
- error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
- if (error)
+ error = generic_write_checks(iocb, from);
+ if (error <= 0)
return error;
error = xfs_break_layouts(inode, iolock);
@@ -566,14 +570,17 @@ restart:
* iolock shared, we need to update it to exclusive which implies
* having to redo all checks before.
*/
- if (*pos > i_size_read(inode)) {
+ if (iocb->ki_pos > i_size_read(inode)) {
+ bool zero = false;
+
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
+ iov_iter_reexpand(from, count);
goto restart;
}
- error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+ error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
if (error)
return error;
}
@@ -673,10 +680,11 @@ xfs_file_dio_aio_write(
xfs_rw_ilock(ip, iolock);
}
- ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+ ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
- iov_iter_truncate(from, count);
+ count = iov_iter_count(from);
+ pos = iocb->ki_pos;
if (mapping->nrpages) {
ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -729,24 +737,22 @@ xfs_file_buffered_aio_write(
ssize_t ret;
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- loff_t pos = iocb->ki_pos;
- size_t count = iov_iter_count(from);
xfs_rw_ilock(ip, iolock);
- ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
+ ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
- iov_iter_truncate(from, count);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
write_retry:
- trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- ret = generic_perform_write(file, from, pos);
+ trace_xfs_file_buffered_write(ip, iov_iter_count(from),
+ iocb->ki_pos, 0);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
if (likely(ret >= 0))
- iocb->ki_pos = pos + ret;
+ iocb->ki_pos += ret;
/*
* If we hit a space limit, try to free up some lingering preallocated
@@ -798,7 +804,7 @@ xfs_file_write_iter(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (unlikely(file->f_flags & O_DIRECT))
+ if (unlikely(iocb->ki_flags & IOCB_DIRECT))
ret = xfs_file_dio_aio_write(iocb, from);
else
ret = xfs_file_buffered_aio_write(iocb, from);
@@ -1381,8 +1387,6 @@ xfs_file_llseek(
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
- .read = new_sync_read,
- .write = new_sync_write,
.read_iter = xfs_file_read_iter,
.write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index daafa1f6d260..6163767aa856 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2867,6 +2867,10 @@ xfs_rename(
* Handle RENAME_EXCHANGE flags
*/
if (flags & RENAME_EXCHANGE) {
+ if (target_ip == NULL) {
+ error = -EINVAL;
+ goto error_return;
+ }
error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
target_dp, target_name, target_ip,
&free_list, &first_block, spaceres);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 86cd6b39bed7..a1cd55f3f351 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -384,10 +384,11 @@ enum xfs_prealloc_flags {
XFS_PREALLOC_INVISIBLE = (1 << 4),
};
-int xfs_update_prealloc_flags(struct xfs_inode *,
- enum xfs_prealloc_flags);
-int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-int xfs_iozero(struct xfs_inode *, loff_t, size_t);
+int xfs_update_prealloc_flags(struct xfs_inode *ip,
+ enum xfs_prealloc_flags flags);
+int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_fsize_t isize, bool *did_zeroing);
+int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
#define IHOLD(ip) \
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d919ad7b16bf..e53a90331422 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -751,6 +751,7 @@ xfs_setattr_size(
int error;
uint lock_flags = 0;
uint commit_flags = 0;
+ bool did_zeroing = false;
trace_xfs_setattr(ip);
@@ -794,20 +795,16 @@ xfs_setattr_size(
return error;
/*
- * Now we can make the changes. Before we join the inode to the
- * transaction, take care of the part of the truncation that must be
- * done without the inode lock. This needs to be done before joining
- * the inode to the transaction, because the inode cannot be unlocked
- * once it is a part of the transaction.
+ * File data changes must be complete before we start the transaction to
+ * modify the inode. This needs to be done before joining the inode to
+ * the transaction because the inode cannot be unlocked once it is a
+ * part of the transaction.
+ *
+ * Start with zeroing any data block beyond EOF that we may expose on
+ * file extension.
*/
if (newsize > oldsize) {
- /*
- * Do the first part of growing a file: zero any data in the
- * last block that is beyond the old EOF. We need to do this
- * before the inode is joined to the transaction to modify
- * i_size.
- */
- error = xfs_zero_eof(ip, newsize, oldsize);
+ error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
if (error)
return error;
}
@@ -817,23 +814,18 @@ xfs_setattr_size(
* any previous writes that are beyond the on disk EOF and the new
* EOF that have not been written out need to be written here. If we
* do not write the data out, we expose ourselves to the null files
- * problem.
- *
- * Only flush from the on disk size to the smaller of the in memory
- * file size or the new size as that's the range we really care about
- * here and prevents waiting for other data not within the range we
- * care about here.
+ * problem. Note that this includes any block zeroing we did above;
+ * otherwise those blocks may not be zeroed after a crash.
*/
- if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+ if (newsize > ip->i_d.di_size &&
+ (oldsize != ip->i_d.di_size || did_zeroing)) {
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
ip->i_d.di_size, newsize);
if (error)
return error;
}
- /*
- * Wait for all direct I/O to complete.
- */
+ /* Now wait for all direct I/O to complete. */
inode_dio_wait(inode);
/*
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 4b33ef112400..365dd57ea760 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -300,8 +300,10 @@ xfs_fs_commit_blocks(
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
- if (error)
+ if (error) {
+ xfs_trans_cancel(tp, 0);
goto out_drop_iolock;
+ }
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 53cc2aaf8d2b..fbbb9e62e274 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -836,6 +836,11 @@ xfs_qm_reset_dqcounts(
*/
xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
"xfs_quotacheck");
+ /*
+ * Reset type in case we are reusing group quota file for
+ * project quotas or vice versa
+ */
+ ddq->d_flags = type;
ddq->d_bcount = 0;
ddq->d_icount = 0;
ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 0d4d3590cf85..996a04064894 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -168,10 +168,6 @@ extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
uint, struct qc_dqblk *);
extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
struct qc_dqblk *);
-extern int xfs_qm_scall_getqstat(struct xfs_mount *,
- struct fs_quota_stat *);
-extern int xfs_qm_scall_getqstatv(struct xfs_mount *,
- struct fs_quota_statv *);
extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint);
extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9b965db45800..9a25c9275fb3 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -38,7 +38,6 @@
STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
uint);
-STATIC uint xfs_qm_export_flags(uint);
/*
* Turn off quota accounting and/or enforcement for all udquots and/or
@@ -389,159 +388,6 @@ xfs_qm_scall_quotaon(
return 0;
}
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- * for Q_XGETQSTAT command.
- */
-int
-xfs_qm_scall_getqstat(
- struct xfs_mount *mp,
- struct fs_quota_stat *out)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- struct xfs_inode *uip = NULL;
- struct xfs_inode *gip = NULL;
- struct xfs_inode *pip = NULL;
- bool tempuqip = false;
- bool tempgqip = false;
- bool temppqip = false;
-
- memset(out, 0, sizeof(fs_quota_stat_t));
-
- out->qs_version = FS_QSTAT_VERSION;
- out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
- (XFS_ALL_QUOTA_ACCT|
- XFS_ALL_QUOTA_ENFD));
- uip = q->qi_uquotaip;
- gip = q->qi_gquotaip;
- pip = q->qi_pquotaip;
- if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip) == 0)
- tempuqip = true;
- }
- if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip) == 0)
- tempgqip = true;
- }
- /*
- * Q_XGETQSTAT doesn't have room for both group and project quotas.
- * So, allow the project quota values to be copied out only if
- * there is no group quota information available.
- */
- if (!gip) {
- if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
- 0, 0, &pip) == 0)
- temppqip = true;
- }
- } else
- pip = NULL;
- if (uip) {
- out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
- out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
- out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
- if (tempuqip)
- IRELE(uip);
- }
-
- if (gip) {
- out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
- out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
- out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
- if (tempgqip)
- IRELE(gip);
- }
- if (pip) {
- out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
- out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
- out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
- if (temppqip)
- IRELE(pip);
- }
- out->qs_incoredqs = q->qi_dquots;
- out->qs_btimelimit = q->qi_btimelimit;
- out->qs_itimelimit = q->qi_itimelimit;
- out->qs_rtbtimelimit = q->qi_rtbtimelimit;
- out->qs_bwarnlimit = q->qi_bwarnlimit;
- out->qs_iwarnlimit = q->qi_iwarnlimit;
-
- return 0;
-}
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- * for Q_XGETQSTATV command, to support separate project quota field.
- */
-int
-xfs_qm_scall_getqstatv(
- struct xfs_mount *mp,
- struct fs_quota_statv *out)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- struct xfs_inode *uip = NULL;
- struct xfs_inode *gip = NULL;
- struct xfs_inode *pip = NULL;
- bool tempuqip = false;
- bool tempgqip = false;
- bool temppqip = false;
-
- out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
- (XFS_ALL_QUOTA_ACCT|
- XFS_ALL_QUOTA_ENFD));
- out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
- out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
- out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
-
- uip = q->qi_uquotaip;
- gip = q->qi_gquotaip;
- pip = q->qi_pquotaip;
- if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip) == 0)
- tempuqip = true;
- }
- if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip) == 0)
- tempgqip = true;
- }
- if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
- 0, 0, &pip) == 0)
- temppqip = true;
- }
- if (uip) {
- out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
- out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
- if (tempuqip)
- IRELE(uip);
- }
-
- if (gip) {
- out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
- out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
- if (tempgqip)
- IRELE(gip);
- }
- if (pip) {
- out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
- out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
- if (temppqip)
- IRELE(pip);
- }
- out->qs_incoredqs = q->qi_dquots;
- out->qs_btimelimit = q->qi_btimelimit;
- out->qs_itimelimit = q->qi_itimelimit;
- out->qs_rtbtimelimit = q->qi_rtbtimelimit;
- out->qs_bwarnlimit = q->qi_bwarnlimit;
- out->qs_iwarnlimit = q->qi_iwarnlimit;
-
- return 0;
-}
-
#define XFS_QC_MASK \
(QC_LIMIT_MASK | QC_TIMER_MASK | QC_WARNS_MASK)
@@ -873,28 +719,6 @@ out_put:
return error;
}
-STATIC uint
-xfs_qm_export_flags(
- uint flags)
-{
- uint uflags;
-
- uflags = 0;
- if (flags & XFS_UQUOTA_ACCT)
- uflags |= FS_QUOTA_UDQ_ACCT;
- if (flags & XFS_GQUOTA_ACCT)
- uflags |= FS_QUOTA_GDQ_ACCT;
- if (flags & XFS_PQUOTA_ACCT)
- uflags |= FS_QUOTA_PDQ_ACCT;
- if (flags & XFS_UQUOTA_ENFD)
- uflags |= FS_QUOTA_UDQ_ENFD;
- if (flags & XFS_GQUOTA_ENFD)
- uflags |= FS_QUOTA_GDQ_ENFD;
- if (flags & XFS_PQUOTA_ENFD)
- uflags |= FS_QUOTA_PDQ_ENFD;
- return uflags;
-}
-
STATIC int
xfs_dqrele_inode(
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 6923905ab33d..7795e0d01382 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -23,10 +23,81 @@
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
#include "xfs_qm.h"
#include <linux/quota.h>
+static void
+xfs_qm_fill_state(
+ struct qc_type_state *tstate,
+ struct xfs_mount *mp,
+ struct xfs_inode *ip,
+ xfs_ino_t ino)
+{
+ struct xfs_quotainfo *q = mp->m_quotainfo;
+ bool tempqip = false;
+
+ tstate->ino = ino;
+ if (!ip && ino == NULLFSINO)
+ return;
+ if (!ip) {
+ if (xfs_iget(mp, NULL, ino, 0, 0, &ip))
+ return;
+ tempqip = true;
+ }
+ tstate->flags |= QCI_SYSFILE;
+ tstate->blocks = ip->i_d.di_nblocks;
+ tstate->nextents = ip->i_d.di_nextents;
+ tstate->spc_timelimit = q->qi_btimelimit;
+ tstate->ino_timelimit = q->qi_itimelimit;
+ tstate->rt_spc_timelimit = q->qi_rtbtimelimit;
+ tstate->spc_warnlimit = q->qi_bwarnlimit;
+ tstate->ino_warnlimit = q->qi_iwarnlimit;
+ tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
+ if (tempqip)
+ IRELE(ip);
+}
+
+/*
+ * Return quota status information, such as enforcements, quota file inode
+ * numbers etc.
+ */
+static int
+xfs_fs_get_quota_state(
+ struct super_block *sb,
+ struct qc_state *state)
+{
+ struct xfs_mount *mp = XFS_M(sb);
+ struct xfs_quotainfo *q = mp->m_quotainfo;
+
+ memset(state, 0, sizeof(*state));
+ if (!XFS_IS_QUOTA_RUNNING(mp))
+ return 0;
+ state->s_incoredqs = q->qi_dquots;
+ if (XFS_IS_UQUOTA_RUNNING(mp))
+ state->s_state[USRQUOTA].flags |= QCI_ACCT_ENABLED;
+ if (XFS_IS_UQUOTA_ENFORCED(mp))
+ state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
+ if (XFS_IS_GQUOTA_RUNNING(mp))
+ state->s_state[GRPQUOTA].flags |= QCI_ACCT_ENABLED;
+ if (XFS_IS_GQUOTA_ENFORCED(mp))
+ state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
+ if (XFS_IS_PQUOTA_RUNNING(mp))
+ state->s_state[PRJQUOTA].flags |= QCI_ACCT_ENABLED;
+ if (XFS_IS_PQUOTA_ENFORCED(mp))
+ state->s_state[PRJQUOTA].flags |= QCI_LIMITS_ENFORCED;
+
+ xfs_qm_fill_state(&state->s_state[USRQUOTA], mp, q->qi_uquotaip,
+ mp->m_sb.sb_uquotino);
+ xfs_qm_fill_state(&state->s_state[GRPQUOTA], mp, q->qi_gquotaip,
+ mp->m_sb.sb_gquotino);
+ xfs_qm_fill_state(&state->s_state[PRJQUOTA], mp, q->qi_pquotaip,
+ mp->m_sb.sb_pquotino);
+ return 0;
+}
+
STATIC int
xfs_quota_type(int type)
{
@@ -40,28 +111,40 @@ xfs_quota_type(int type)
}
}
-STATIC int
-xfs_fs_get_xstate(
+#define XFS_QC_SETINFO_MASK (QC_TIMER_MASK | QC_WARNS_MASK)
+
+/*
+ * Adjust quota timers & warnings
+ */
+static int
+xfs_fs_set_info(
struct super_block *sb,
- struct fs_quota_stat *fqs)
+ int type,
+ struct qc_info *info)
{
- struct xfs_mount *mp = XFS_M(sb);
+ struct xfs_mount *mp = XFS_M(sb);
+ struct qc_dqblk newlim;
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return xfs_qm_scall_getqstat(mp, fqs);
-}
+ if (!XFS_IS_QUOTA_ON(mp))
+ return -ESRCH;
+ if (info->i_fieldmask & ~XFS_QC_SETINFO_MASK)
+ return -EINVAL;
+ if ((info->i_fieldmask & XFS_QC_SETINFO_MASK) == 0)
+ return 0;
-STATIC int
-xfs_fs_get_xstatev(
- struct super_block *sb,
- struct fs_quota_statv *fqs)
-{
- struct xfs_mount *mp = XFS_M(sb);
+ newlim.d_fieldmask = info->i_fieldmask;
+ newlim.d_spc_timer = info->i_spc_timelimit;
+ newlim.d_ino_timer = info->i_ino_timelimit;
+ newlim.d_rt_spc_timer = info->i_rt_spc_timelimit;
+ newlim.d_ino_warns = info->i_ino_warnlimit;
+ newlim.d_spc_warns = info->i_spc_warnlimit;
+ newlim.d_rt_spc_warns = info->i_rt_spc_warnlimit;
- if (!XFS_IS_QUOTA_RUNNING(mp))
- return -ENOSYS;
- return xfs_qm_scall_getqstatv(mp, fqs);
+ return xfs_qm_scall_setqlim(mp, 0, xfs_quota_type(type), &newlim);
}
static unsigned int
@@ -178,8 +261,8 @@ xfs_fs_set_dqblk(
}
const struct quotactl_ops xfs_quotactl_operations = {
- .get_xstatev = xfs_fs_get_xstatev,
- .get_xstate = xfs_fs_get_xstate,
+ .get_state = xfs_fs_get_quota_state,
+ .set_info = xfs_fs_set_info,
.quota_enable = xfs_quota_enable,
.quota_disable = xfs_quota_disable,
.rm_xquota = xfs_fs_rm_xquota,
OpenPOWER on IntegriCloud