summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/affs.h22
-rw-r--r--fs/affs/amigaffs.c42
-rw-r--r--fs/affs/inode.c9
-rw-r--r--fs/affs/namei.c95
-rw-r--r--fs/affs/super.c3
-rw-r--r--fs/afs/dir.c14
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/root.c17
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/ceph/addr.c21
-rw-r--r--fs/ceph/cache.c2
-rw-r--r--fs/ceph/caps.c40
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c32
-rw-r--r--fs/ceph/export.c3
-rw-r--r--fs/ceph/file.c106
-rw-r--r--fs/ceph/inode.c172
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c175
-rw-r--r--fs/ceph/mds_client.h15
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/ceph/super.h14
-rw-r--r--fs/dax.c3
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/kthread.c2
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/iomap.c10
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/kernfs/file.c62
-rw-r--r--fs/kernfs/kernfs-internal.h2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/ncpfs/sock.c4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/filelayout/filelayout.c4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c4
-rw-r--r--fs/nfs/objlayout/objlayout.c2
-rw-r--r--fs/nfsd/blocklayout.c6
-rw-r--r--fs/nfsd/export.c1
-rw-r--r--fs/nfsd/nfs2acl.c1
-rw-r--r--fs/nfsd/nfs3acl.c1
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/nfs4proc.c88
-rw-r--r--fs/nfsd/nfs4state.c12
-rw-r--r--fs/nfsd/nfs4xdr.c29
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c70
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c16
-rw-r--r--fs/nfsd/state.h1
-rw-r--r--fs/nfsd/vfs.c104
-rw-r--r--fs/nfsd/vfs.h6
-rw-r--r--fs/nilfs2/alloc.c2
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/orangefs/orangefs-utils.c4
-rw-r--r--fs/proc/base.c8
-rw-r--r--fs/proc/kcore.c5
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/stat.c2
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/userfaultfd.c8
-rw-r--r--fs/xfs/xfs_aops.c16
-rw-r--r--fs/xfs/xfs_file.c4
87 files changed, 803 insertions, 614 deletions
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 2f088773f1c0..2f8bab390d13 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -138,9 +138,9 @@ extern int affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh);
extern int affs_remove_header(struct dentry *dentry);
extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
-extern void secs_to_datestamp(time64_t secs, struct affs_date *ds);
-extern umode_t prot_to_mode(u32 prot);
-extern void mode_to_prot(struct inode *inode);
+extern void affs_secs_to_datestamp(time64_t secs, struct affs_date *ds);
+extern umode_t affs_prot_to_mode(u32 prot);
+extern void affs_mode_to_prot(struct inode *inode);
__printf(3, 4)
extern void affs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
@@ -162,6 +162,7 @@ extern void affs_free_bitmap(struct super_block *sb);
/* namei.c */
+extern const struct export_operations affs_export_ops;
extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
extern int affs_unlink(struct inode *dir, struct dentry *dentry);
@@ -178,7 +179,6 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* inode.c */
-extern unsigned long affs_parent_ino(struct inode *dir);
extern struct inode *affs_new_inode(struct inode *dir);
extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
extern void affs_evict_inode(struct inode *inode);
@@ -213,6 +213,12 @@ extern const struct address_space_operations affs_aops_ofs;
extern const struct dentry_operations affs_dentry_operations;
extern const struct dentry_operations affs_intl_dentry_operations;
+static inline bool affs_validblock(struct super_block *sb, int block)
+{
+ return(block >= AFFS_SB(sb)->s_reserved &&
+ block < AFFS_SB(sb)->s_partition_size);
+}
+
static inline void
affs_set_blocksize(struct super_block *sb, int size)
{
@@ -222,7 +228,7 @@ static inline struct buffer_head *
affs_bread(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_bread(sb, block);
return NULL;
}
@@ -230,7 +236,7 @@ static inline struct buffer_head *
affs_getblk(struct super_block *sb, int block)
{
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size)
+ if (affs_validblock(sb, block))
return sb_getblk(sb, block);
return NULL;
}
@@ -239,7 +245,7 @@ affs_getzeroblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
lock_buffer(bh);
memset(bh->b_data, 0 , sb->s_blocksize);
@@ -254,7 +260,7 @@ affs_getemptyblk(struct super_block *sb, int block)
{
struct buffer_head *bh;
pr_debug("%s: %d\n", __func__, block);
- if (block >= AFFS_SB(sb)->s_reserved && block < AFFS_SB(sb)->s_partition_size) {
+ if (affs_validblock(sb, block)) {
bh = sb_getblk(sb, block);
wait_on_buffer(bh);
set_buffer_uptodate(bh);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 0ec65c133b93..b573c3b9a328 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -367,7 +367,7 @@ affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
}
void
-secs_to_datestamp(time64_t secs, struct affs_date *ds)
+affs_secs_to_datestamp(time64_t secs, struct affs_date *ds)
{
u32 days;
u32 minute;
@@ -386,55 +386,55 @@ secs_to_datestamp(time64_t secs, struct affs_date *ds)
}
umode_t
-prot_to_mode(u32 prot)
+affs_prot_to_mode(u32 prot)
{
umode_t mode = 0;
if (!(prot & FIBF_NOWRITE))
- mode |= S_IWUSR;
+ mode |= 0200;
if (!(prot & FIBF_NOREAD))
- mode |= S_IRUSR;
+ mode |= 0400;
if (!(prot & FIBF_NOEXECUTE))
- mode |= S_IXUSR;
+ mode |= 0100;
if (prot & FIBF_GRP_WRITE)
- mode |= S_IWGRP;
+ mode |= 0020;
if (prot & FIBF_GRP_READ)
- mode |= S_IRGRP;
+ mode |= 0040;
if (prot & FIBF_GRP_EXECUTE)
- mode |= S_IXGRP;
+ mode |= 0010;
if (prot & FIBF_OTR_WRITE)
- mode |= S_IWOTH;
+ mode |= 0002;
if (prot & FIBF_OTR_READ)
- mode |= S_IROTH;
+ mode |= 0004;
if (prot & FIBF_OTR_EXECUTE)
- mode |= S_IXOTH;
+ mode |= 0001;
return mode;
}
void
-mode_to_prot(struct inode *inode)
+affs_mode_to_prot(struct inode *inode)
{
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
- if (!(mode & S_IXUSR))
+ if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
- if (!(mode & S_IRUSR))
+ if (!(mode & 0400))
prot |= FIBF_NOREAD;
- if (!(mode & S_IWUSR))
+ if (!(mode & 0200))
prot |= FIBF_NOWRITE;
- if (mode & S_IXGRP)
+ if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
- if (mode & S_IRGRP)
+ if (mode & 0040)
prot |= FIBF_GRP_READ;
- if (mode & S_IWGRP)
+ if (mode & 0020)
prot |= FIBF_GRP_WRITE;
- if (mode & S_IXOTH)
+ if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
- if (mode & S_IROTH)
+ if (mode & 0004)
prot |= FIBF_OTR_READ;
- if (mode & S_IWOTH)
+ if (mode & 0002)
prot |= FIBF_OTR_WRITE;
AFFS_I(inode)->i_protect = prot;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index fe4e1290dbb5..a5e6097eb5a9 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -69,7 +69,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
if (affs_test_opt(sbi->s_flags, SF_SETMODE))
inode->i_mode = sbi->s_mode;
else
- inode->i_mode = prot_to_mode(prot);
+ inode->i_mode = affs_prot_to_mode(prot);
id = be16_to_cpu(tail->uid);
if (id == 0 || affs_test_opt(sbi->s_flags, SF_SETUID))
@@ -184,11 +184,12 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
}
tail = AFFS_TAIL(sb, bh);
if (tail->stype == cpu_to_be32(ST_ROOT)) {
- secs_to_datestamp(inode->i_mtime.tv_sec,&AFFS_ROOT_TAIL(sb, bh)->root_change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec,
+ &AFFS_ROOT_TAIL(sb, bh)->root_change);
} else {
tail->protect = cpu_to_be32(AFFS_I(inode)->i_protect);
tail->size = cpu_to_be32(inode->i_size);
- secs_to_datestamp(inode->i_mtime.tv_sec,&tail->change);
+ affs_secs_to_datestamp(inode->i_mtime.tv_sec, &tail->change);
if (!(inode->i_ino == AFFS_SB(sb)->s_root_block)) {
uid = i_uid_read(inode);
gid = i_gid_read(inode);
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
mark_inode_dirty(inode);
if (attr->ia_valid & ATTR_MODE)
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
out:
return error;
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 29186d29a3b6..96dd1d09a273 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -9,29 +9,10 @@
*/
#include "affs.h"
+#include <linux/exportfs.h>
typedef int (*toupper_t)(int);
-static int affs_toupper(int ch);
-static int affs_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(const struct dentry *, struct qstr *);
-static int affs_intl_compare_dentry(const struct dentry *dentry,
- unsigned int len, const char *str, const struct qstr *name);
-
-const struct dentry_operations affs_dentry_operations = {
- .d_hash = affs_hash_dentry,
- .d_compare = affs_compare_dentry,
-};
-
-const struct dentry_operations affs_intl_dentry_operations = {
- .d_hash = affs_intl_hash_dentry,
- .d_compare = affs_intl_compare_dentry,
-};
-
-
/* Simple toupper() for DOS\1 */
static int
@@ -271,7 +252,7 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
return -ENOSPC;
inode->i_mode = mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
mark_inode_dirty(inode);
inode->i_op = &affs_file_inode_operations;
@@ -301,7 +282,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return -ENOSPC;
inode->i_mode = S_IFDIR | mode;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
inode->i_op = &affs_dir_inode_operations;
inode->i_fop = &affs_dir_operations;
@@ -347,7 +328,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
inode_nohighmem(inode);
inode->i_data.a_ops = &affs_symlink_aops;
inode->i_mode = S_IFLNK | 0777;
- mode_to_prot(inode);
+ affs_mode_to_prot(inode);
error = -EIO;
bh = affs_bread(sb, inode->i_ino);
@@ -465,3 +446,71 @@ done:
affs_brelse(bh);
return retval;
}
+
+static struct dentry *affs_get_parent(struct dentry *child)
+{
+ struct inode *parent;
+ struct buffer_head *bh;
+
+ bh = affs_bread(child->d_sb, d_inode(child)->i_ino);
+ if (!bh)
+ return ERR_PTR(-EIO);
+
+ parent = affs_iget(child->d_sb,
+ be32_to_cpu(AFFS_TAIL(child->d_sb, bh)->parent));
+ brelse(bh);
+ if (IS_ERR(parent))
+ return ERR_CAST(parent);
+
+ return d_obtain_alias(parent);
+}
+
+static struct inode *affs_nfs_get_inode(struct super_block *sb, u64 ino,
+ u32 generation)
+{
+ struct inode *inode;
+
+ if (!affs_validblock(sb, ino))
+ return ERR_PTR(-ESTALE);
+
+ inode = affs_iget(sb, ino);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
+
+ if (generation && inode->i_generation != generation) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+
+ return inode;
+}
+
+static struct dentry *affs_fh_to_dentry(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+static struct dentry *affs_fh_to_parent(struct super_block *sb, struct fid *fid,
+ int fh_len, int fh_type)
+{
+ return generic_fh_to_parent(sb, fid, fh_len, fh_type,
+ affs_nfs_get_inode);
+}
+
+const struct export_operations affs_export_ops = {
+ .fh_to_dentry = affs_fh_to_dentry,
+ .fh_to_parent = affs_fh_to_parent,
+ .get_parent = affs_get_parent,
+};
+
+const struct dentry_operations affs_dentry_operations = {
+ .d_hash = affs_hash_dentry,
+ .d_compare = affs_compare_dentry,
+};
+
+const struct dentry_operations affs_intl_dentry_operations = {
+ .d_hash = affs_intl_hash_dentry,
+ .d_compare = affs_intl_compare_dentry,
+};
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d6384863192c..37532538e8ab 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -32,7 +32,7 @@ affs_commit_super(struct super_block *sb, int wait)
struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
lock_buffer(bh);
- secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
+ affs_secs_to_datestamp(ktime_get_real_seconds(), &tail->disk_change);
affs_fix_checksum(sb, bh);
unlock_buffer(bh);
@@ -507,6 +507,7 @@ got_root:
return -ENOMEM;
}
+ sb->s_export_op = &affs_export_ops;
pr_debug("s_flags=%lX\n", sb->s_flags);
return 0;
}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 51a241e09fbb..949f960337f5 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -252,7 +252,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
/* skip entries marked unused in the bitmap */
if (!(block->pagehdr.bitmap[offset / 8] &
(1 << (offset % 8)))) {
- _debug("ENT[%Zu.%u]: unused",
+ _debug("ENT[%zu.%u]: unused",
blkoff / sizeof(union afs_dir_block), offset);
if (offset >= curr)
ctx->pos = blkoff +
@@ -266,7 +266,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
sizeof(*block) -
offset * sizeof(union afs_dirent));
- _debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
+ _debug("ENT[%zu.%u]: %s %zu \"%s\"",
blkoff / sizeof(union afs_dir_block), offset,
(offset < curr ? "skip" : "fill"),
nlen, dire->u.name);
@@ -274,23 +274,23 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
/* work out where the next possible entry is */
for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) {
if (next >= AFS_DIRENT_PER_BLOCK) {
- _debug("ENT[%Zu.%u]:"
+ _debug("ENT[%zu.%u]:"
" %u travelled beyond end dir block"
- " (len %u/%Zu)",
+ " (len %u/%zu)",
blkoff / sizeof(union afs_dir_block),
offset, next, tmp, nlen);
return -EIO;
}
if (!(block->pagehdr.bitmap[next / 8] &
(1 << (next % 8)))) {
- _debug("ENT[%Zu.%u]:"
- " %u unmarked extension (len %u/%Zu)",
+ _debug("ENT[%zu.%u]:"
+ " %u unmarked extension (len %u/%zu)",
blkoff / sizeof(union afs_dir_block),
offset, next, tmp, nlen);
return -EIO;
}
- _debug("ENT[%Zu.%u]: ext %u/%Zu",
+ _debug("ENT[%zu.%u]: ext %u/%zu",
blkoff / sizeof(union afs_dir_block),
next, tmp, nlen);
next++;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 6f48d670c941..806df746f1a9 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -38,8 +38,6 @@
* which have been left busy at at service shutdown.
*/
-#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl)
-
typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *,
struct autofs_dev_ioctl *);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 82e8f6edfb48..d79ced925861 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -281,8 +281,8 @@ static int autofs4_mount_wait(const struct path *path, bool rcu_walk)
pr_debug("waiting for mount name=%pd\n", path->dentry);
status = autofs4_wait(sbi, path, NFY_MOUNT);
pr_debug("mount wait done status=%d\n", status);
+ ino->last_used = jiffies;
}
- ino->last_used = jiffies;
return status;
}
@@ -321,16 +321,21 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
*/
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
- struct autofs_info *ino;
struct dentry *new;
new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
- ino = autofs4_dentry_ino(new);
- ino->last_used = jiffies;
- dput(path->dentry);
- path->dentry = new;
+ if (new == dentry)
+ dput(new);
+ else {
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(new);
+ ino->last_used = jiffies;
+ dput(path->dentry);
+ path->dentry = new;
+ }
}
return path->dentry;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1c62845a72c7..77c30f15a02c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -989,7 +989,7 @@ struct block_device *bdget(dev_t dev)
bdev->bd_super = NULL;
bdev->bd_inode = inode;
bdev->bd_bdi = &noop_backing_dev_info;
- bdev->bd_block_size = (1 << inode->i_blkbits);
+ bdev->bd_block_size = i_blocksize(inode);
bdev->bd_part_count = 0;
bdev->bd_invalidated = 0;
inode->i_mode = S_IFBLK;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 18e5146df864..c1d2a07205da 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2875,7 +2875,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret)
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, 1 << inode->i_blkbits,
+ range->len, i_blocksize(inode),
offset + len, &alloc_hint);
else
btrfs_free_reserved_data_space(inode, range->start,
diff --git a/fs/buffer.c b/fs/buffer.c
index 0e87401cf335..28484b3ebc98 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2395,7 +2395,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
+ unsigned int blocksize = i_blocksize(inode);
struct page *page;
void *fsdata;
pgoff_t index, curidx;
@@ -2475,8 +2475,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
- unsigned zerofrom;
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int zerofrom;
int err;
err = cont_expand_zero(file, mapping, pos, bytes);
@@ -2838,7 +2838,7 @@ int nobh_truncate_page(struct address_space *mapping,
struct buffer_head map_bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -2916,7 +2916,7 @@ int block_truncate_page(struct address_space *mapping,
struct buffer_head *bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -3028,7 +3028,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
struct inode *inode = mapping->host;
tmp.b_state = 0;
tmp.b_blocknr = 0;
- tmp.b_size = 1 << inode->i_blkbits;
+ tmp.b_size = i_blocksize(inode);
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 09860c0ec7c1..f297a9e18642 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
+ osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
@@ -751,7 +752,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct pagevec pvec;
int done = 0;
int rc = 0;
- unsigned wsize = 1 << inode->i_blkbits;
+ unsigned int wsize = i_blocksize(inode);
struct ceph_osd_request *req = NULL;
int do_sync = 0;
loff_t snap_size, i_size;
@@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@@ -1017,8 +1018,7 @@ new_request:
&ci->i_layout, vino,
offset, &len, 0, num_ops,
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, false);
if (IS_ERR(req)) {
@@ -1028,8 +1028,7 @@ new_request:
min(num_ops,
CEPH_OSD_SLAB_OPS),
CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, true);
BUG_ON(IS_ERR(req));
@@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file,
int r;
struct ceph_snap_context *snapc, *oldest;
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
unlock_page(page);
return -EIO;
@@ -1681,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 0, 1,
- CEPH_OSD_OP_CREATE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1699,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 1, 3,
- CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
if (IS_ERR(req)) {
@@ -1873,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
goto out_unlock;
}
- wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
+ wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 5bc5d37b1217..4e7421caf380 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
inode);
if (fscache_cookie_enabled(ci->fscache)) {
- dout("fscache_file_set_cookie %p %p enabing cache\n",
+ dout("fscache_file_set_cookie %p %p enabling cache\n",
inode, filp);
}
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 94fd76d04683..cd966f276a8d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
/*
* Return caps we have registered with the MDS(s) as 'wanted'.
*/
-int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
+int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
{
struct ceph_cap *cap;
struct rb_node *p;
@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
- if (!__cap_is_valid(cap))
+ if (check && !__cap_is_valid(cap))
continue;
if (cap == ci->i_auth_cap)
mds_wanted |= cap->mds_wanted;
@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
delayed = 1;
}
ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
+ if (want & ~cap->mds_wanted) {
+ /* user space may open/close single file frequently.
+ * This avoids droping mds_wanted immediately after
+ * requesting new mds_wanted.
+ */
+ __cap_set_timeouts(mdsc, ci);
+ }
cap->issued &= retain; /* drop bits we don't want */
if (cap->implemented & ~cap->issued) {
@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
- ceph_sync_write_wait(inode);
-
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
goto out;
@@ -2477,23 +2482,22 @@ again:
if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
int mds_wanted;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
+ if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
*err = -EIO;
ret = 1;
goto out_unlock;
}
- mds_wanted = __ceph_caps_mds_wanted(ci);
- if ((mds_wanted & need) != need) {
+ mds_wanted = __ceph_caps_mds_wanted(ci, false);
+ if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
*err = -ESTALE;
ret = 1;
goto out_unlock;
}
- if ((mds_wanted & file_wanted) ==
- (file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
+ if (!(file_wanted & ~mds_wanted))
ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
}
@@ -3404,6 +3408,7 @@ retry:
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
ci->i_auth_cap = tcap;
+
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
spin_lock(&mdsc->cap_dirty_lock);
@@ -3417,9 +3422,18 @@ retry:
} else if (tsession) {
/* add placeholder for the export tagert */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
+ tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
+ if (!list_empty(&ci->i_cap_flush_list) &&
+ ci->i_auth_cap == tcap) {
+ spin_lock(&mdsc->cap_dirty_lock);
+ list_move_tail(&ci->i_flushing_item,
+ &tcap->session->s_cap_flushing);
+ spin_unlock(&mdsc->cap_dirty_lock);
+ }
+
__ceph_remove_cap(cap, false);
goto out_unlock;
}
@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
}
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
+ struct inode *dir,
int mds, int drop, int unless)
{
- struct inode *dir = d_inode(dentry->d_parent);
+ struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
+ if (!dir) {
+ parent = dget(dentry->d_parent);
+ dir = d_inode(parent);
+ }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
+ dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 39ff678e567f..f2ae393e2c31 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
seq_puts(s, "\t(unsafe)");
else
seq_puts(s, "\t");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 8ab1fdf0bd49..3e9ad501addf 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -371,7 +371,7 @@ more:
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
- req->r_direct_is_hash = true;
+ __set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
if (fi->last_name) {
req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
if (!req->r_path2) {
@@ -417,7 +417,7 @@ more:
fi->frag = frag;
fi->last_readdir = req;
- if (req->r_did_prepopulate) {
+ if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
fi->readdir_cache_idx = req->r_readdir_cache_idx;
if (fi->readdir_cache_idx < 0) {
/* preclude from marking dir ordered */
@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
goto out;
}
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- req->r_locked_dir = dir;
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = drop_caps_for_unlink(inode);
@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
req->r_old_dentry_dir = old_dir;
- req->r_locked_dir = new_dir;
+ req->r_parent = new_dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
struct inode *dir;
if (flags & LOOKUP_RCU) {
- parent = ACCESS_ONCE(dentry->d_parent);
+ parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
op = ceph_snap(dir) == CEPH_SNAPDIR ?
- CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (!IS_ERR(req)) {
req->r_dentry = dget(dentry);
- req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
+ req->r_num_caps = 2;
+ req->r_parent = dir;
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 180bbef760f2..e8f11fa565c5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_inode = d_inode(child);
ihold(d_inode(child));
req->r_ino2 = ceph_vino(d_inode(parent));
- req->r_locked_dir = d_inode(parent);
+ req->r_parent = d_inode(parent);
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 045d30d26624..26cc95421cca 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
- int mds_wanted = __ceph_caps_mds_wanted(ci);
+ int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL);
dout("open %p fmode %d want %s issued %s using existing\n",
@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.open.mask = cpu_to_le32(mask);
- req->r_locked_dir = dir; /* caller holds dir->i_mutex */
+ req->r_parent = dir;
+ set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
goto out;
}
- req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
@@ -794,89 +793,6 @@ out:
kfree(aio_work);
}
-/*
- * Write commit request unsafe callback, called to tell us when a
- * request is unsafe (that is, in flight--has been handed to the
- * messenger to send to its target osd). It is called again when
- * we've received a response message indicating the request is
- * "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
- * is completed early (and unsuccessfully) due to a timeout or
- * interrupt.
- *
- * This is used if we requested both an ACK and ONDISK commit reply
- * from the OSD.
- */
-static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
-{
- struct ceph_inode_info *ci = ceph_inode(req->r_inode);
-
- dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
- unsafe ? "un" : "");
- if (unsafe) {
- ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
- spin_lock(&ci->i_unsafe_lock);
- list_add_tail(&req->r_unsafe_item,
- &ci->i_unsafe_writes);
- spin_unlock(&ci->i_unsafe_lock);
-
- complete_all(&req->r_completion);
- } else {
- spin_lock(&ci->i_unsafe_lock);
- list_del_init(&req->r_unsafe_item);
- spin_unlock(&ci->i_unsafe_lock);
- ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
- }
-}
-
-/*
- * Wait on any unsafe replies for the given inode. First wait on the
- * newest request, and make that the upper bound. Then, if there are
- * more requests, keep waiting on the oldest as long as it is still older
- * than the original request.
- */
-void ceph_sync_write_wait(struct inode *inode)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- struct list_head *head = &ci->i_unsafe_writes;
- struct ceph_osd_request *req;
- u64 last_tid;
-
- if (!S_ISREG(inode->i_mode))
- return;
-
- spin_lock(&ci->i_unsafe_lock);
- if (list_empty(head))
- goto out;
-
- /* set upper bound as _last_ entry in chain */
-
- req = list_last_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- last_tid = req->r_tid;
-
- do {
- ceph_osdc_get_request(req);
- spin_unlock(&ci->i_unsafe_lock);
-
- dout("sync_write_wait on tid %llu (until %llu)\n",
- req->r_tid, last_tid);
- wait_for_completion(&req->r_done_completion);
- ceph_osdc_put_request(req);
-
- spin_lock(&ci->i_unsafe_lock);
- /*
- * from here on look at first entry in chain, since we
- * only want to wait for anything older than last_tid
- */
- if (list_empty(head))
- break;
- req = list_first_entry(head, struct ceph_osd_request,
- r_unsafe_item);
- } while (req->r_tid < last_tid);
-out:
- spin_unlock(&ci->i_unsafe_lock);
-}
-
static ssize_t
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_snap_context *snapc,
@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret2);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
} else {
flags = CEPH_OSD_FLAG_READ;
}
@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ACK;
+ flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
while ((len = iov_iter_count(from)) > 0) {
size_t left;
@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
goto out;
}
- /* get a second commit callback */
- req->r_unsafe_callback = ceph_sync_write_unsafe;
req->r_inode = inode;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
ceph_vino(inode),
offset, length,
0, 1, op,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
+ CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5e659d054b40..fd8f771f99b7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rdcache_gen = 0;
ci->i_rdcache_revoking = 0;
- INIT_LIST_HEAD(&ci->i_unsafe_writes);
INIT_LIST_HEAD(&ci->i_unsafe_dirops);
INIT_LIST_HEAD(&ci->i_unsafe_iops);
spin_lock_init(&ci->i_unsafe_lock);
@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
return 1;
}
-void ceph_evict_inode(struct inode *inode)
-{
- /* wait unsafe sync writes */
- ceph_sync_write_wait(inode);
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
-}
-
static inline blkcnt_t calc_inode_blocks(u64 size)
{
return (size + (1<<9) - 1) >> 9;
@@ -1016,7 +1007,9 @@ out:
static void update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
- unsigned long from_time)
+ unsigned long from_time,
+ struct ceph_vino *tgt_vino,
+ struct ceph_vino *dir_vino)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
long unsigned duration = le32_to_cpu(lease->duration_ms);
@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
+ /*
+ * Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
+ * we expect a negative dentry.
+ */
+ if (!tgt_vino && d_really_is_positive(dentry))
+ return;
+
+ if (tgt_vino && (d_really_is_negative(dentry) ||
+ !ceph_ino_compare(d_inode(dentry), tgt_vino)))
+ return;
+
spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, duration, ttl);
- /* make lease_rdcache_gen match directory */
dir = d_inode(dentry->d_parent);
+ /* make sure parent matches dir_vino */
+ if (!ceph_ino_compare(dir, dir_vino))
+ goto out_unlock;
+
/* only track leases on regular dentries */
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out_unlock;
@@ -1108,61 +1115,27 @@ out:
*
* Called with snap_rwsem (read).
*/
-int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
- struct ceph_mds_session *session)
+int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
{
+ struct ceph_mds_session *session = req->r_session;
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct inode *in = NULL;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
dout("fill_trace %p is_dentry %d is_target %d\n", req,
rinfo->head->is_dentry, rinfo->head->is_target);
-#if 0
- /*
- * Debugging hook:
- *
- * If we resend completed ops to a recovering mds, we get no
- * trace. Since that is very rare, pretend this is the case
- * to ensure the 'no trace' handlers in the callers behave.
- *
- * Fill in inodes unconditionally to avoid breaking cap
- * invariants.
- */
- if (rinfo->head->op & CEPH_MDS_OP_WRITE) {
- pr_info("fill_trace faking empty trace on %lld %s\n",
- req->r_tid, ceph_mds_op_name(rinfo->head->op));
- if (rinfo->head->is_dentry) {
- rinfo->head->is_dentry = 0;
- err = fill_inode(req->r_locked_dir,
- &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1);
- }
- if (rinfo->head->is_target) {
- rinfo->head->is_target = 0;
- ininfo = rinfo->targeti.in;
- vino.ino = le64_to_cpu(ininfo->ino);
- vino.snap = le64_to_cpu(ininfo->snapid);
- in = ceph_get_inode(sb, vino);
- err = fill_inode(in, &rinfo->targeti, NULL,
- session, req->r_request_started,
- req->r_fmode);
- iput(in);
- }
- }
-#endif
-
if (!rinfo->head->is_target && !rinfo->head->is_dentry) {
dout("fill_trace reply is empty!\n");
- if (rinfo->head->result == 0 && req->r_locked_dir)
+ if (rinfo->head->result == 0 && req->r_parent)
ceph_invalidate_dir_request(req);
return 0;
}
if (rinfo->head->is_dentry) {
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
if (dir) {
err = fill_inode(dir, NULL,
@@ -1188,8 +1161,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
dname.name = rinfo->dname;
dname.len = rinfo->dname_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
retry_lookup:
dn = d_lookup(parent, &dname);
dout("d_lookup on parent=%p name=%.*s got %p\n",
@@ -1206,8 +1179,8 @@ retry_lookup:
}
err = 0;
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1221,10 +1194,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
- vino.ino = le64_to_cpu(rinfo->targeti.in->ino);
- vino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
- in = ceph_get_inode(sb, vino);
+ in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
@@ -1233,8 +1206,8 @@ retry_lookup:
err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
- (!req->r_aborted && rinfo->head->result == 0) ?
- req->r_fmode : -1,
+ (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ rinfo->head->result == 0) ? req->r_fmode : -1,
&req->r_caps_reservation);
if (err < 0) {
pr_err("fill_inode badness %p %llx.%llx\n",
@@ -1247,8 +1220,9 @@ retry_lookup:
* ignore null lease/binding on snapdir ENOENT, or else we
* will have trouble splicing in the virtual snapdir later
*/
- if (rinfo->head->is_dentry && !req->r_aborted &&
- req->r_locked_dir &&
+ if (rinfo->head->is_dentry &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
+ test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
@@ -1257,17 +1231,19 @@ retry_lookup:
* mknod symlink mkdir : null -> new inode
* unlink : linked -> null
*/
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
struct dentry *dn = req->r_dentry;
bool have_dir_cap, have_lease;
BUG_ON(!dn);
BUG_ON(!dir);
BUG_ON(d_inode(dn->d_parent) != dir);
- BUG_ON(ceph_ino(dir) !=
- le64_to_cpu(rinfo->diri.in->ino));
- BUG_ON(ceph_snap(dir) !=
- le64_to_cpu(rinfo->diri.in->snapid));
+
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ BUG_ON(ceph_ino(dir) != dvino.ino);
+ BUG_ON(ceph_snap(dir) != dvino.snap);
/* do we have a lease on the whole dir? */
have_dir_cap =
@@ -1319,12 +1295,13 @@ retry_lookup:
ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
- } else {
- if (have_lease && d_unhashed(dn))
+ } else if (have_lease) {
+ if (d_unhashed(dn))
d_add(dn, NULL);
update_dentry_lease(dn, rinfo->dlease,
session,
- req->r_request_started);
+ req->r_request_started,
+ NULL, &dvino);
}
goto done;
}
@@ -1347,15 +1324,19 @@ retry_lookup:
have_lease = false;
}
- if (have_lease)
+ if (have_lease) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
update_dentry_lease(dn, rinfo->dlease, session,
- req->r_request_started);
+ req->r_request_started,
+ &tvino, &dvino);
+ }
dout(" final dn %p\n", dn);
- } else if (!req->r_aborted &&
- (req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
- req->r_op == CEPH_MDS_OP_MKSNAP)) {
+ } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
+ req->r_op == CEPH_MDS_OP_MKSNAP) &&
+ !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct dentry *dn = req->r_dentry;
- struct inode *dir = req->r_locked_dir;
+ struct inode *dir = req->r_parent;
/* fill out a snapdir LOOKUPSNAP dentry */
BUG_ON(!dn);
@@ -1370,6 +1351,26 @@ retry_lookup:
goto done;
}
req->r_dentry = dn; /* may have spliced */
+ } else if (rinfo->head->is_dentry) {
+ struct ceph_vino *ptvino = NULL;
+
+ if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
+ le32_to_cpu(rinfo->dlease->duration_ms)) {
+ dvino.ino = le64_to_cpu(rinfo->diri.in->ino);
+ dvino.snap = le64_to_cpu(rinfo->diri.in->snapid);
+
+ if (rinfo->head->is_target) {
+ tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
+ tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
+ ptvino = &tvino;
+ }
+
+ update_dentry_lease(req->r_dentry, rinfo->dlease,
+ session, req->r_request_started, ptvino,
+ &dvino);
+ } else {
+ dout("%s: no dentry lease or dir cap\n", __func__);
+ }
}
done:
dout("fill_trace done err=%d\n", err);
@@ -1478,7 +1479,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
- if (req->r_aborted)
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
return readdir_prepopulate_inodes_only(req, session);
if (rinfo->hash_order && req->r_path2) {
@@ -1523,14 +1524,14 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
/* FIXME: release caps/leases if error occurs */
for (i = 0; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
- struct ceph_vino vino;
+ struct ceph_vino tvino, dvino;
dname.name = rde->name;
dname.len = rde->name_len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
- vino.ino = le64_to_cpu(rde->inode.in->ino);
- vino.snap = le64_to_cpu(rde->inode.in->snapid);
+ tvino.ino = le64_to_cpu(rde->inode.in->ino);
+ tvino.snap = le64_to_cpu(rde->inode.in->snapid);
if (rinfo->hash_order) {
u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
@@ -1559,8 +1560,8 @@ retry_lookup:
goto out;
}
} else if (d_really_is_positive(dn) &&
- (ceph_ino(d_inode(dn)) != vino.ino ||
- ceph_snap(d_inode(dn)) != vino.snap)) {
+ (ceph_ino(d_inode(dn)) != tvino.ino ||
+ ceph_snap(d_inode(dn)) != tvino.snap)) {
dout(" dn %p points to wrong inode %p\n",
dn, d_inode(dn));
d_delete(dn);
@@ -1572,7 +1573,7 @@ retry_lookup:
if (d_really_is_positive(dn)) {
in = d_inode(dn);
} else {
- in = ceph_get_inode(parent->d_sb, vino);
+ in = ceph_get_inode(parent->d_sb, tvino);
if (IS_ERR(in)) {
dout("new_inode badness\n");
d_drop(dn);
@@ -1617,8 +1618,9 @@ retry_lookup:
ceph_dentry(dn)->offset = rde->offset;
+ dvino = ceph_vino(d_inode(parent));
update_dentry_lease(dn, rde->lease, req->r_session,
- req->r_request_started);
+ req->r_request_started, &tvino, &dvino);
if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn,
@@ -1632,7 +1634,7 @@ next_item:
}
out:
if (err == 0 && skipped == 0) {
- req->r_did_prepopulate = true;
+ set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags);
req->r_readdir_cache_idx = cache_ctl.index;
}
ceph_readdir_cache_release(&cache_ctl);
@@ -1720,7 +1722,7 @@ static void ceph_invalidate_work(struct work_struct *work)
mutex_lock(&ci->i_truncate_mutex);
- if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 7d752d53353a..4c9c72f26eb9 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -25,7 +25,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
l.stripe_count = ci->i_layout.stripe_count;
l.object_size = ci->i_layout.object_size;
l.data_pool = ci->i_layout.pool_id;
- l.preferred_osd = (s32)-1;
+ l.preferred_osd = -1;
if (copy_to_user(arg, &l, sizeof(l)))
return -EFAULT;
}
@@ -97,7 +97,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
nl.data_pool = ci->i_layout.pool_id;
/* this is obsolete, and always -1 */
- nl.preferred_osd = le64_to_cpu(-1);
+ nl.preferred_osd = -1;
err = __validate_layout(mdsc, &nl);
if (err)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c9d2e553a6c4..c681762d76e6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -547,8 +547,8 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
iput(req->r_inode);
}
- if (req->r_locked_dir)
- ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
@@ -628,6 +628,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -644,13 +647,15 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
erase_request(&mdsc->request_tree, req);
- if (req->r_unsafe_dir && req->r_got_unsafe) {
+ if (req->r_unsafe_dir &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_dir_item);
spin_unlock(&ci->i_unsafe_lock);
}
- if (req->r_target_inode && req->r_got_unsafe) {
+ if (req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_target_item);
@@ -668,6 +673,28 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
/*
+ * Walk back up the dentry tree until we hit a dentry representing a
+ * non-snapshot inode. We do this using the rcu_read_lock (which must be held
+ * when calling this) to ensure that the objects won't disappear while we're
+ * working with them. Once we hit a candidate dentry, we attempt to take a
+ * reference to it, and return that as the result.
+ */
+static struct inode *get_nonsnap_parent(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ while (dentry && !IS_ROOT(dentry)) {
+ inode = d_inode_rcu(dentry);
+ if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
+ break;
+ dentry = dentry->d_parent;
+ }
+ if (inode)
+ inode = igrab(inode);
+ return inode;
+}
+
+/*
* Choose mds to send request to next. If there is a hint set in the
* request (e.g., due to a prior forward hint from the mds), use that.
* Otherwise, consult frag tree and/or caps to identify the
@@ -675,19 +702,6 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
-static struct dentry *get_nonsnap_parent(struct dentry *dentry)
-{
- /*
- * we don't need to worry about protecting the d_parent access
- * here because we never renaming inside the snapped namespace
- * except to resplice to another snapdir, and either the old or new
- * result is a valid result.
- */
- while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
- dentry = dentry->d_parent;
- return dentry;
-}
-
static int __choose_mds(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req)
{
@@ -697,7 +711,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
int mode = req->r_direct_mode;
int mds = -1;
u32 hash = req->r_direct_hash;
- bool is_hash = req->r_direct_is_hash;
+ bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
/*
* is there a specific mds we should try? ignore hint if we have
@@ -717,30 +731,39 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode = NULL;
if (req->r_inode) {
inode = req->r_inode;
+ ihold(inode);
} else if (req->r_dentry) {
/* ignore race with rename; old or new d_parent is okay */
- struct dentry *parent = req->r_dentry->d_parent;
- struct inode *dir = d_inode(parent);
+ struct dentry *parent;
+ struct inode *dir;
+
+ rcu_read_lock();
+ parent = req->r_dentry->d_parent;
+ dir = req->r_parent ? : d_inode_rcu(parent);
- if (dir->i_sb != mdsc->fsc->sb) {
- /* not this fs! */
+ if (!dir || dir->i_sb != mdsc->fsc->sb) {
+ /* not this fs or parent went negative */
inode = d_inode(req->r_dentry);
+ if (inode)
+ ihold(inode);
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn = get_nonsnap_parent(parent);
- inode = d_inode(dn);
+ inode = get_nonsnap_parent(parent);
dout("__choose_mds using nonsnap parent %p\n", inode);
} else {
/* dentry target */
inode = d_inode(req->r_dentry);
if (!inode || mode == USE_AUTH_MDS) {
/* dir + name */
- inode = dir;
+ inode = igrab(dir);
hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
+ } else {
+ ihold(inode);
}
}
+ rcu_read_unlock();
}
dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash,
@@ -769,7 +792,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
(int)r, frag.ndist);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
/* since this file/dir wasn't known to be
@@ -784,7 +807,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), frag.frag, mds);
if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
CEPH_MDS_STATE_ACTIVE)
- return mds;
+ goto out;
}
}
}
@@ -797,6 +820,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
+ iput(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -804,6 +828,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
inode, ceph_vinop(inode), mds,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
+out:
+ iput(inode);
return mds;
random:
@@ -1036,7 +1062,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -1146,7 +1171,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
if (ci->i_wrbuffer_ref > 0 &&
- ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
invalidate = true;
while (!list_empty(&ci->i_cap_flush_list)) {
@@ -1775,18 +1800,23 @@ retry:
return path;
}
-static int build_dentry_path(struct dentry *dentry,
+static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino,
int *pfreepath)
{
char *path;
- if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
- *pino = ceph_ino(d_inode(dentry->d_parent));
+ rcu_read_lock();
+ if (!dir)
+ dir = d_inode_rcu(dentry->d_parent);
+ if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
+ *pino = ceph_ino(dir);
+ rcu_read_unlock();
*ppath = dentry->d_name.name;
*ppathlen = dentry->d_name.len;
return 0;
}
+ rcu_read_unlock();
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -1822,8 +1852,8 @@ static int build_inode_path(struct inode *inode,
* an explicit ino+path.
*/
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
- const char *rpath, u64 rino,
- const char **ppath, int *pathlen,
+ struct inode *rdiri, const char *rpath,
+ u64 rino, const char **ppath, int *pathlen,
u64 *ino, int *freepath)
{
int r = 0;
@@ -1833,7 +1863,8 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
- r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
+ r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
+ freepath);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
} else if (rpath || rino) {
@@ -1866,7 +1897,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
int ret;
ret = set_request_path_attr(req->r_inode, req->r_dentry,
- req->r_path1, req->r_ino1.ino,
+ req->r_parent, req->r_path1, req->r_ino1.ino,
&path1, &pathlen1, &ino1, &freepath1);
if (ret < 0) {
msg = ERR_PTR(ret);
@@ -1874,6 +1905,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
}
ret = set_request_path_attr(NULL, req->r_old_dentry,
+ req->r_old_dentry_dir,
req->r_path2, req->r_ino2.ino,
&path2, &pathlen2, &ino2, &freepath2);
if (ret < 0) {
@@ -1927,10 +1959,13 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
mds, req->r_inode_drop, req->r_inode_unless, 0);
if (req->r_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_dentry,
- mds, req->r_dentry_drop, req->r_dentry_unless);
+ req->r_parent, mds, req->r_dentry_drop,
+ req->r_dentry_unless);
if (req->r_old_dentry_drop)
releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
- mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
+ req->r_old_dentry_dir, mds,
+ req->r_old_dentry_drop,
+ req->r_old_dentry_unless);
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
d_inode(req->r_old_dentry),
@@ -2012,7 +2047,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
/*
* Replay. Do not regenerate message (and rebuild
@@ -2061,16 +2096,16 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
rhead = msg->front.iov_base;
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
- if (req->r_locked_dir)
+ if (req->r_parent)
flags |= CEPH_MDS_FLAG_WANT_DENTRY;
rhead->flags = cpu_to_le32(flags);
rhead->num_fwd = req->r_num_fwd;
rhead->num_retry = req->r_attempts - 1;
rhead->ino = 0;
- dout(" r_locked_dir = %p\n", req->r_locked_dir);
+ dout(" r_parent = %p\n", req->r_parent);
return 0;
}
@@ -2084,8 +2119,8 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = 0;
- if (req->r_err || req->r_got_result) {
- if (req->r_aborted)
+ if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
__unregister_request(mdsc, req);
goto out;
}
@@ -2096,12 +2131,12 @@ static int __do_request(struct ceph_mds_client *mdsc,
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout("do_request forced umount\n");
err = -EIO;
goto finish;
}
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
@@ -2215,7 +2250,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts > 0)
continue; /* only new requests */
@@ -2250,11 +2285,11 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
dout("do_request on %p\n", req);
- /* take CAP_PIN refs for r_inode, r_locked_dir, r_old_dentry */
+ /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- if (req->r_locked_dir)
- ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
+ if (req->r_parent)
+ ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
if (req->r_old_dentry_dir)
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
@@ -2289,7 +2324,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
/* only abort if we didn't race with a real reply */
- if (req->r_got_result) {
+ if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
err = le32_to_cpu(req->r_reply_info.head->result);
} else if (err < 0) {
dout("aborted request %lld with %d\n", req->r_tid, err);
@@ -2301,10 +2336,10 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
*/
mutex_lock(&req->r_fill_mutex);
req->r_err = err;
- req->r_aborted = true;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
mutex_unlock(&req->r_fill_mutex);
- if (req->r_locked_dir &&
+ if (req->r_parent &&
(req->r_op & CEPH_MDS_OP_WRITE))
ceph_invalidate_dir_request(req);
} else {
@@ -2323,7 +2358,7 @@ out:
*/
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
{
- struct inode *inode = req->r_locked_dir;
+ struct inode *inode = req->r_parent;
dout("invalidate_dir_request %p (complete, lease(s))\n", inode);
@@ -2379,14 +2414,14 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
/* dup? */
- if ((req->r_got_unsafe && !head->safe) ||
- (req->r_got_safe && head->safe)) {
+ if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
+ (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
pr_warn("got a dup %s reply on %llu from mds%d\n",
head->safe ? "safe" : "unsafe", tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
}
- if (req->r_got_safe) {
+ if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
@@ -2425,10 +2460,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
- req->r_got_safe = true;
+ set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
__unregister_request(mdsc, req);
- if (req->r_got_unsafe) {
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
/*
* We already handled the unsafe response, now do the
* cleanup. No need to examine the response; the MDS
@@ -2437,7 +2472,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
@@ -2446,7 +2480,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
goto out;
}
} else {
- req->r_got_unsafe = true;
+ set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
if (req->r_unsafe_dir) {
struct ceph_inode_info *ci =
@@ -2486,7 +2520,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */
mutex_lock(&req->r_fill_mutex);
current->journal_info = req;
- err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
+ err = ceph_fill_trace(mdsc->fsc->sb, req);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
req->r_op == CEPH_MDS_OP_LSSNAP))
@@ -2500,7 +2534,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (realm)
ceph_put_snap_realm(mdsc, realm);
- if (err == 0 && req->r_got_unsafe && req->r_target_inode) {
+ if (err == 0 && req->r_target_inode &&
+ test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
spin_lock(&ci->i_unsafe_lock);
list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
@@ -2508,12 +2543,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
}
out_err:
mutex_lock(&mdsc->mutex);
- if (!req->r_aborted) {
+ if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
if (err) {
req->r_err = err;
} else {
req->r_reply = ceph_msg_get(msg);
- req->r_got_result = true;
+ set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
}
} else {
dout("reply arrived after request %lld was aborted\n", tid);
@@ -2557,7 +2592,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
goto out; /* dup reply? */
}
- if (req->r_aborted) {
+ if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
} else if (fwd_seq <= req->r_num_fwd) {
@@ -2567,7 +2602,7 @@ static void handle_forward(struct ceph_mds_client *mdsc,
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
BUG_ON(req->r_err);
- BUG_ON(req->r_got_result);
+ BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
req->r_attempts = 0;
req->r_num_fwd = fwd_seq;
req->r_resend_mds = next_mds;
@@ -2732,7 +2767,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
while (p) {
req = rb_entry(p, struct ceph_mds_request, r_node);
p = rb_next(p);
- if (req->r_got_unsafe)
+ if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
continue;
if (req->r_attempts == 0)
continue; /* only old requests */
@@ -3556,7 +3591,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@@ -3587,7 +3622,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
*/
static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
{
- if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+ if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return true;
return atomic_read(&mdsc->num_sessions) <= skipped;
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3c6f77b7bb02..ac0475a2daa7 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -202,9 +202,18 @@ struct ceph_mds_request {
char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
- struct inode *r_locked_dir; /* dir (if any) i_mutex locked by vfs */
+ struct inode *r_parent; /* parent dir inode */
struct inode *r_target_inode; /* resulting inode */
+#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
+#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
+#define CEPH_MDS_R_GOT_UNSAFE (3) /* got an unsafe reply */
+#define CEPH_MDS_R_GOT_SAFE (4) /* got a safe reply */
+#define CEPH_MDS_R_GOT_RESULT (5) /* got a result */
+#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
+#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
+ unsigned long r_req_flags;
+
struct mutex r_fill_mutex;
union ceph_mds_request_args r_args;
@@ -216,7 +225,6 @@ struct ceph_mds_request {
/* for choosing which mds to send this request to */
int r_direct_mode;
u32 r_direct_hash; /* choose dir frag based on this dentry hash */
- bool r_direct_is_hash; /* true if r_direct_hash is valid */
/* data payload is used for xattr ops */
struct ceph_pagelist *r_pagelist;
@@ -234,7 +242,6 @@ struct ceph_mds_request {
struct ceph_mds_reply_info_parsed r_reply_info;
struct page *r_locked_page;
int r_err;
- bool r_aborted;
unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */
@@ -262,9 +269,7 @@ struct ceph_mds_request {
ceph_mds_request_callback_t r_callback;
ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
- bool r_got_unsafe, r_got_safe, r_got_result;
- bool r_did_prepopulate;
long long r_dir_release_cnt;
long long r_dir_ordered_cnt;
int r_readdir_cache_idx;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6bd20d707bfd..0ec8d0114e57 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -757,7 +757,6 @@ static const struct super_operations ceph_super_ops = {
.destroy_inode = ceph_destroy_inode,
.write_inode = ceph_write_inode,
.drop_inode = ceph_drop_inode,
- .evict_inode = ceph_evict_inode,
.sync_fs = ceph_sync_fs,
.put_super = ceph_put_super,
.show_options = ceph_show_options,
@@ -952,6 +951,14 @@ static int ceph_register_bdi(struct super_block *sb,
fsc->backing_dev_info.ra_pages =
VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+ if (fsc->mount_options->rsize > fsc->mount_options->rasize &&
+ fsc->mount_options->rsize >= PAGE_SIZE)
+ fsc->backing_dev_info.io_pages =
+ (fsc->mount_options->rsize + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
+ else if (fsc->mount_options->rsize == 0)
+ fsc->backing_dev_info.io_pages = ULONG_MAX;
+
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
atomic_long_inc_return(&bdi_seq));
if (!err)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3373b61faefd..e9410bcf4113 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -45,8 +45,8 @@
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
-#define CEPH_RSIZE_DEFAULT 0 /* max read size */
-#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT (64*1024*1024) /* max read size */
+#define CEPH_RASIZE_DEFAULT (8192*1024) /* max readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -343,7 +343,6 @@ struct ceph_inode_info {
u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
- struct list_head i_unsafe_writes; /* uncommitted sync writes */
struct list_head i_unsafe_dirops; /* uncommitted mds dir ops */
struct list_head i_unsafe_iops; /* uncommitted mds inode ops */
spinlock_t i_unsafe_lock;
@@ -602,7 +601,7 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci)
}
/* what the mds thinks we want */
-extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci);
+extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
extern void ceph_caps_init(struct ceph_mds_client *mdsc);
extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
@@ -753,7 +752,6 @@ extern const struct inode_operations ceph_file_iops;
extern struct inode *ceph_alloc_inode(struct super_block *sb);
extern void ceph_destroy_inode(struct inode *inode);
extern int ceph_drop_inode(struct inode *inode);
-extern void ceph_evict_inode(struct inode *inode);
extern struct inode *ceph_get_inode(struct super_block *sb,
struct ceph_vino vino);
@@ -764,8 +762,7 @@ extern void ceph_fill_file_time(struct inode *inode, int issued,
u64 time_warp_seq, struct timespec *ctime,
struct timespec *mtime, struct timespec *atime);
extern int ceph_fill_trace(struct super_block *sb,
- struct ceph_mds_request *req,
- struct ceph_mds_session *session);
+ struct ceph_mds_request *req);
extern int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct ceph_mds_session *session);
@@ -904,6 +901,7 @@ extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
extern int ceph_encode_inode_release(void **p, struct inode *inode,
int mds, int drop, int unless, int force);
extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
+ struct inode *dir,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
@@ -933,7 +931,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
-extern void ceph_sync_write_wait(struct inode *inode);
+
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct file_operations ceph_snapdir_fops;
diff --git a/fs/dax.c b/fs/dax.c
index 5ae8b71ebadc..7436c98b92c8 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1436,7 +1436,8 @@ out:
return result;
}
#else
-static int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops)
+static int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ const struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c87bae4376b8..a04ebea77de8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -587,7 +587,7 @@ static int dio_set_defer_completion(struct dio *dio)
/*
* Call into the fs to map some more disk blocks. We record the current number
* of available blocks at sdio->blocks_available. These are in units of the
- * fs blocksize, (1 << inode->i_blkbits).
+ * fs blocksize, i_blocksize(inode).
*
* The fs is allowed to map lots of blocks at once. If it wants to do that,
* it uses the passed inode-relative block number as the file offset, as usual.
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 866bb18efefe..e00d45af84ea 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -123,7 +123,7 @@ void ecryptfs_destroy_kthread(void)
* @lower_dentry: Lower dentry for file to open
* @lower_mnt: Lower vfsmount for file to open
*
- * This function gets a r/w file opened againt the lower dentry.
+ * This function gets a r/w file opened against the lower dentry.
*
* Returns zero on success; non-zero otherwise
*/
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bcb68fcc8445..5ec16313da1a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1895,7 +1895,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
* Also, we do not currently supported nested exclusive wakeups.
*/
- if (epds.events & EPOLLEXCLUSIVE) {
+ if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
if (op == EPOLL_CTL_MOD)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 37e059202cd2..e7f12a204cbc 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -84,7 +84,7 @@
* -- writeout
* Writeout looks up whole page cache to see if a buffer is
* mapped, If there are not very many delayed buffers, then it is
- * time comsuming.
+ * time consuming.
*
* With extent status tree implementation, FIEMAP, SEEK_HOLE/DATA,
* bigalloc and writeout can figure out if a block or a range of
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 41d8e53e5a7f..971f66342080 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2221,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
{
struct inode *inode = mpd->inode;
int err;
- ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
+ ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
do {
@@ -3577,7 +3577,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
- round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
+ round_down(offset, i_blocksize(inode)) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else if (is_sync_kiocb(iocb)) {
@@ -5179,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
* do. We do the check mainly to optimize the common PAGE_SIZE ==
* blocksize case
*/
- if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
+ if (offset > PAGE_SIZE - i_blocksize(inode))
return;
while (1) {
page = find_lock_page(inode->i_mapping,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 10c62de642c6..354dc1a894c2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -838,7 +838,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
inode = page->mapping->host;
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
blocks_per_page = PAGE_SIZE / blocksize;
groups_per_page = blocks_per_page >> 1;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6fc14def0c70..578f8c33fb44 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
if (PageUptodate(page))
return 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index a3ec3ae7d347..482081bcdf70 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -38,7 +38,7 @@ static int hfs_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFS_SB(sb)->session >= 0) {
te.cdte_track = HFS_SB(sb)->session;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index ebb85e5f6549..e254fa0f0697 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -132,7 +132,7 @@ static int hfsplus_get_last_session(struct super_block *sb,
/* default values */
*start = 0;
- *size = sb->s_bdev->bd_inode->i_size >> 9;
+ *size = i_size_read(sb->s_bdev->bd_inode) >> 9;
if (HFSPLUS_SB(sb)->session >= 0) {
te.cdte_track = HFSPLUS_SB(sb)->session;
diff --git a/fs/iomap.c b/fs/iomap.c
index d209f42cdcb8..0f85f2410605 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -420,8 +420,8 @@ int
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
const struct iomap_ops *ops)
{
- unsigned blocksize = (1 << inode->i_blkbits);
- unsigned off = pos & (blocksize - 1);
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int off = pos & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!off)
@@ -735,9 +735,9 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
struct iomap_dio *dio = data;
- unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
- unsigned fs_block_size = (1 << inode->i_blkbits), pad;
- unsigned align = iov_iter_alignment(dio->submit.iter);
+ unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+ unsigned int fs_block_size = i_blocksize(inode), pad;
+ unsigned int align = iov_iter_alignment(dio->submit.iter);
struct iov_iter iter;
struct bio *bio;
bool need_zeroout = false;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2be7c9ce6663..c64c2574a0aa 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
@@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 439b946c4808..db5900aaa55a 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -478,7 +478,7 @@ static void kernfs_drain(struct kernfs_node *kn)
rwsem_release(&kn->dep_map, 1, _RET_IP_);
}
- kernfs_unmap_bin_file(kn);
+ kernfs_drain_open_files(kn);
mutex_lock(&kernfs_mutex);
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 4f0535890b30..35043a8c4529 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -515,7 +515,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
goto out_put;
rc = 0;
- of->mmapped = 1;
+ of->mmapped = true;
of->vm_ops = vma->vm_ops;
vma->vm_ops = &kernfs_vm_ops;
out_put:
@@ -707,7 +707,8 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
if (error)
goto err_free;
- ((struct seq_file *)file->private_data)->private = of;
+ of->seq_file = file->private_data;
+ of->seq_file->private = of;
/* seq_file clears PWRITE unconditionally, restore it if WRITE */
if (file->f_mode & FMODE_WRITE)
@@ -716,13 +717,22 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
/* make sure we have open node struct */
error = kernfs_get_open_node(kn, of);
if (error)
- goto err_close;
+ goto err_seq_release;
+
+ if (ops->open) {
+ /* nobody has access to @of yet, skip @of->mutex */
+ error = ops->open(of);
+ if (error)
+ goto err_put_node;
+ }
/* open succeeded, put active references */
kernfs_put_active(kn);
return 0;
-err_close:
+err_put_node:
+ kernfs_put_open_node(kn, of);
+err_seq_release:
seq_release(inode, file);
err_free:
kfree(of->prealloc_buf);
@@ -732,11 +742,41 @@ err_out:
return error;
}
+/* used from release/drain to ensure that ->release() is called exactly once */
+static void kernfs_release_file(struct kernfs_node *kn,
+ struct kernfs_open_file *of)
+{
+ /*
+ * @of is guaranteed to have no other file operations in flight and
+ * we just want to synchronize release and drain paths.
+ * @kernfs_open_file_mutex is enough. @of->mutex can't be used
+ * here because drain path may be called from places which can
+ * cause circular dependency.
+ */
+ lockdep_assert_held(&kernfs_open_file_mutex);
+
+ if (!of->released) {
+ /*
+ * A file is never detached without being released and we
+ * need to be able to release files which are deactivated
+ * and being drained. Don't use kernfs_ops().
+ */
+ kn->attr.ops->release(of);
+ of->released = true;
+ }
+}
+
static int kernfs_fop_release(struct inode *inode, struct file *filp)
{
struct kernfs_node *kn = filp->f_path.dentry->d_fsdata;
struct kernfs_open_file *of = kernfs_of(filp);
+ if (kn->flags & KERNFS_HAS_RELEASE) {
+ mutex_lock(&kernfs_open_file_mutex);
+ kernfs_release_file(kn, of);
+ mutex_unlock(&kernfs_open_file_mutex);
+ }
+
kernfs_put_open_node(kn, of);
seq_release(inode, filp);
kfree(of->prealloc_buf);
@@ -745,12 +785,12 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
return 0;
}
-void kernfs_unmap_bin_file(struct kernfs_node *kn)
+void kernfs_drain_open_files(struct kernfs_node *kn)
{
struct kernfs_open_node *on;
struct kernfs_open_file *of;
- if (!(kn->flags & KERNFS_HAS_MMAP))
+ if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
return;
spin_lock_irq(&kernfs_open_node_lock);
@@ -762,10 +802,16 @@ void kernfs_unmap_bin_file(struct kernfs_node *kn)
return;
mutex_lock(&kernfs_open_file_mutex);
+
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ if (kn->flags & KERNFS_HAS_MMAP)
+ unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+
+ kernfs_release_file(kn, of);
}
+
mutex_unlock(&kernfs_open_file_mutex);
kernfs_put_open_node(kn, NULL);
@@ -964,6 +1010,8 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
kn->flags |= KERNFS_HAS_SEQ_SHOW;
if (ops->mmap)
kn->flags |= KERNFS_HAS_MMAP;
+ if (ops->release)
+ kn->flags |= KERNFS_HAS_RELEASE;
rc = kernfs_add_one(kn);
if (rc) {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index bfd551bbf231..3100987cf8ba 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -104,7 +104,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
*/
extern const struct file_operations kernfs_file_fops;
-void kernfs_unmap_bin_file(struct kernfs_node *kn);
+void kernfs_drain_open_files(struct kernfs_node *kn);
/*
* symlink.c
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1c13dd80744f..7e4ea3b9f472 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -322,6 +322,8 @@ static int lockd_inet6addr_event(struct notifier_block *this,
dprintk("lockd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nlmsvc_rqst->rq_server,
(struct sockaddr *)&sin6);
}
diff --git a/fs/mpage.c b/fs/mpage.c
index 28af984a3d96..baff8f820c29 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -115,7 +115,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
SetPageUptodate(page);
return;
}
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
}
head = page_buffers(page);
page_bh = head;
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index f32f272ee501..97b111d79489 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -525,7 +525,7 @@ static int do_tcp_rcv(struct ncp_server *server, void *buffer, size_t len)
return result;
}
if (result > len) {
- pr_err("tcp: bug in recvmsg (%u > %Zu)\n", result, len);
+ pr_err("tcp: bug in recvmsg (%u > %zu)\n", result, len);
return -EIO;
}
return result;
@@ -619,7 +619,7 @@ skipdata:;
goto skipdata2;
}
if (datalen > req->datalen + 8) {
- pr_err("tcp: Unexpected reply len %d (expected at most %Zd)\n", datalen, req->datalen + 8);
+ pr_err("tcp: Unexpected reply len %d (expected at most %zd)\n", datalen, req->datalen + 8);
server->rcv.state = 3;
goto skipdata;
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 2905479f214a..0ca370d23ddb 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -381,7 +381,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
struct blk_plug plug;
int i;
- dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+ dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
/* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index eb094c6011d8..fd0284c1dc32 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -1083,7 +1083,8 @@ struct svc_version nfs4_callback_version1 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
struct svc_version nfs4_callback_version4 = {
@@ -1092,5 +1093,6 @@ struct svc_version nfs4_callback_version4 = {
.vs_proc = nfs4_callback_procedures1,
.vs_xdrsize = NFS4_CALLBACK_XDRSIZE,
.vs_dispatch = NULL,
- .vs_hidden = 1,
+ .vs_hidden = true,
+ .vs_need_cong_ctrl = true,
};
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a3fc48ba4931..18f98e08544d 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -482,7 +482,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr)
u32 j, idx;
struct nfs_fh *fh;
- dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+ dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
@@ -540,7 +540,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
if (IS_ERR(ds_clnt))
return PNFS_NOT_ATTEMPTED;
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
+ dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 0ca4af8cca5d..d6acc688df7e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1751,7 +1751,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
int vers;
struct nfs_fh *fh;
- dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
+ dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
hdr->args.pgbase, (size_t)hdr->args.count, offset);
@@ -1828,7 +1828,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
vers = nfs4_ff_layout_ds_version(lseg, idx);
- dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n",
+ dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count),
vers);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 2a4cdce939a0..8f3d2acb81c3 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@ objlayout_read_pagelist(struct nfs_pgio_header *hdr)
&hdr->args.pgbase,
hdr->args.offset, hdr->args.count);
- dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
+ dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
__func__, inode->i_ino, offset, count, hdr->res.eof);
err = objio_read_pagelist(hdr);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index a06115e31612..92b4b41d19d2 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -24,7 +24,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
- u32 block_size = (1 << inode->i_blkbits);
+ u32 block_size = i_blocksize(inode);
struct pnfs_block_extent *bex;
struct iomap iomap;
u32 device_generation = 0;
@@ -181,7 +181,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
int nr_iomaps;
nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
@@ -375,7 +375,7 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
int nr_iomaps;
nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 43e109cc0ccc..e71f11b1a180 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1102,6 +1102,7 @@ static struct flags {
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
+ { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index d08cd88155c7..838f90f3f890 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -376,5 +376,4 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 0c890347cde3..dcb5f79076c0 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -266,6 +266,5 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index d818e4ffd79f..045c9081eabe 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -193,11 +193,9 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nfserr = nfsd_write(rqstp, &resp->fh, NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &resp->committed);
+ nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
+ rqstp->rq_vec, argp->vlen,
+ &cnt, resp->committed);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78109d666c..0274db6e65d0 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -303,6 +303,7 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, length + 4);
if (unlikely(p == NULL))
goto out_overflow;
+ p += XDR_QUADLEN(length);
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
@@ -396,13 +397,10 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_callback *cb)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
- struct nfs4_sessionid id;
- int status;
+ int status = -ESERVERFAULT;
__be32 *p;
u32 dummy;
- status = -ESERVERFAULT;
-
/*
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
@@ -410,9 +408,8 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
- memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
- if (memcmp(id.data, session->se_sessionid.data,
- NFS4_MAX_SESSIONID_LEN) != 0) {
+
+ if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
dprintk("NFS: %s Invalid session id\n", __func__);
goto out;
}
@@ -753,6 +750,14 @@ int set_callback_cred(void)
return 0;
}
+void cleanup_callback_cred(void)
+{
+ if (callback_cred) {
+ put_rpccred(callback_cred);
+ callback_cred = NULL;
+ }
+}
+
static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
{
if (clp->cl_minorversion == 0) {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b20577dcdd2..6b9b6cca469f 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -628,6 +628,10 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
*uid = make_kuid(&init_user_ns, id);
if (!uid_valid(*uid))
@@ -641,6 +645,10 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
{
__be32 status;
u32 id = -1;
+
+ if (name == NULL || namelen == 0)
+ return nfserr_inval;
+
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
*gid = make_kgid(&init_user_ns, id);
if (!gid_valid(*gid))
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74a6e573e061..cbeeda1e94a2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -95,11 +95,15 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u32 *bmval, u32 *writable)
{
struct dentry *dentry = cstate->current_fh.fh_dentry;
+ struct svc_export *exp = cstate->current_fh.fh_export;
if (!nfsd_attrs_supported(cstate->minorversion, bmval))
return nfserr_attrnotsupp;
if ((bmval[0] & FATTR4_WORD0_ACL) && !IS_POSIXACL(d_inode(dentry)))
return nfserr_attrnotsupp;
+ if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) &&
+ !(exp->ex_flags & NFSEXP_SECURITY_LABEL))
+ return nfserr_attrnotsupp;
if (writable && !bmval_is_subset(bmval, writable))
return nfserr_inval;
if (writable && (bmval[2] & FATTR4_WORD2_MODE_UMASK) &&
@@ -983,7 +987,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- &write->wr_how_written);
+ write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
@@ -1838,6 +1842,12 @@ static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
+static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ /* ac_supported, ac_resp_access */
+ return (op_encode_hdr_size + 2)* sizeof(__be32);
+}
+
static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
@@ -1892,6 +1902,11 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
+static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
+}
+
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1933,6 +1948,11 @@ static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
+static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
+}
+
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1952,11 +1972,23 @@ static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
+static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
+ * sizeof(__be32);
+}
+
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
+static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
+ (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
+}
+
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
@@ -2011,6 +2043,19 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
}
#ifdef CONFIG_NFSD_PNFS
+static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+
+ return (op_encode_hdr_size +
+ 1 /* gd_layout_type*/ +
+ XDR_QUADLEN(rlen) +
+ 2 /* gd_notify_types */) * sizeof(__be32);
+}
+
/*
* At this stage we don't really know what layout driver will handle the request,
* so we need to define an arbitrary upper bound here.
@@ -2040,10 +2085,17 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
}
#endif /* CONFIG_NFSD_PNFS */
+
+static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 3) * sizeof(__be32);
+}
+
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_access_rsize,
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
@@ -2081,6 +2133,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getfh_rsize,
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
@@ -2099,6 +2152,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
@@ -2111,15 +2165,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID,
.op_name = "OP_LOOKUPP",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
@@ -2177,6 +2234,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_readlink_rsize,
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
@@ -2215,6 +2273,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
@@ -2240,6 +2299,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
@@ -2314,11 +2374,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_secinfo_rsize,
},
[OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_test_stateid_rsize,
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
@@ -2332,6 +2394,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = (nfsd4op_func)nfsd4_layoutget,
@@ -2381,6 +2444,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_seek_rsize,
},
};
@@ -2425,14 +2489,11 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- struct nfsd4_operation *opdesc;
- nfsd4op_rsize estimator;
-
if (op->opnum == OP_ILLEGAL)
return op_encode_hdr_size * sizeof(__be32);
- opdesc = OPDESC(op);
- estimator = opdesc->op_rsize_bop;
- return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+
+ BUG_ON(OPDESC(op)->op_rsize_bop == NULL);
+ return OPDESC(op)->op_rsize_bop(rqstp, op);
}
void warn_on_nonidempotent_op(struct nfsd4_op *op)
@@ -2476,12 +2537,13 @@ static struct svc_procedure nfsd_procedures4[2] = {
};
struct svc_version nfsd_version4 = {
- .vs_vers = 4,
- .vs_nproc = 2,
- .vs_proc = nfsd_procedures4,
- .vs_dispatch = nfsd_dispatch,
- .vs_xdrsize = NFS4_SVC_XDRSIZE,
- .vs_rpcb_optnl = 1,
+ .vs_vers = 4,
+ .vs_nproc = 2,
+ .vs_proc = nfsd_procedures4,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS4_SVC_XDRSIZE,
+ .vs_rpcb_optnl = true,
+ .vs_need_cong_ctrl = true,
};
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a0dee8ae9f97..e9ef50addddb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2281,7 +2281,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
- dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+ dprintk("NFSD: this client (clientid %08x/%08x) "
"will not receive delegations\n",
clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -7012,23 +7012,24 @@ nfs4_state_start(void)
ret = set_callback_cred();
if (ret)
- return -ENOMEM;
+ return ret;
+
laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
if (laundry_wq == NULL) {
ret = -ENOMEM;
- goto out_recovery;
+ goto out_cleanup_cred;
}
ret = nfsd4_create_callback_queue();
if (ret)
goto out_free_laundry;
set_max_delegations();
-
return 0;
out_free_laundry:
destroy_workqueue(laundry_wq);
-out_recovery:
+out_cleanup_cred:
+ cleanup_callback_cred();
return ret;
}
@@ -7086,6 +7087,7 @@ nfs4_state_shutdown(void)
{
destroy_workqueue(laundry_wq);
nfsd4_destroy_callback_queue();
+ cleanup_callback_cred();
}
static void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 8fae53ce21d1..382c1fd05b4c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -58,7 +58,7 @@
#define NFSDDBG_FACILITY NFSDDBG_XDR
-u32 nfsd_suppattrs[3][3] = {
+const u32 nfsd_suppattrs[3][3] = {
{NFSD4_SUPPORTED_ATTRS_WORD0,
NFSD4_SUPPORTED_ATTRS_WORD1,
NFSD4_SUPPORTED_ATTRS_WORD2},
@@ -1250,7 +1250,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
READ_BUF(16);
p = xdr_decode_hyper(p, &write->wr_offset);
write->wr_stable_how = be32_to_cpup(p++);
- if (write->wr_stable_how > 2)
+ if (write->wr_stable_how > NFS_FILE_SYNC)
goto xdr_error;
write->wr_buflen = be32_to_cpup(p++);
@@ -1941,12 +1941,12 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
} else
max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
- * OP_LOCK may return a conflicting lock. (Special case
- * because it will just skip encoding this if it runs
- * out of xdr buffer space, and it is the only operation
- * that behaves this way.)
+ * OP_LOCK and OP_LOCKT may return a conflicting lock.
+ * (Special case because it will just skip encoding this
+ * if it runs out of xdr buffer space, and it is the only
+ * operation that behaves this way.)
*/
- if (op->opnum == OP_LOCK)
+ if (op->opnum == OP_LOCK || op->opnum == OP_LOCKT)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) {
@@ -1966,9 +1966,13 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
DECODE_TAIL;
}
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
+ struct svc_export *exp)
{
- if (IS_I_VERSION(inode)) {
+ if (exp->ex_flags & NFSEXP_V4ROOT) {
+ *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
+ *p++ = 0;
+ } else if (IS_I_VERSION(inode)) {
p = xdr_encode_hyper(p, inode->i_version);
} else {
*p++ = cpu_to_be32(stat->ctime.tv_sec);
@@ -2417,8 +2421,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- err = security_inode_getsecctx(d_inode(dentry),
+ if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
+ err = security_inode_getsecctx(d_inode(dentry),
&context, &contextlen);
+ else
+ err = -EOPNOTSUPP;
contextsupport = (err == 0);
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
@@ -2490,7 +2497,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 8);
if (!p)
goto out_resource;
- p = encode_change(p, &stat, d_inode(dentry));
+ p = encode_change(p, &stat, d_inode(dentry), exp);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
p = xdr_reserve_space(xdr, 8);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index d6b97b424ad1..96fd15979cbd 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -578,7 +578,7 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
struct kvec *vec = &rqstp->rq_res.head[0];
if (vec->iov_len + data->iov_len > PAGE_SIZE) {
- printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+ printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
data->iov_len);
return 0;
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f3b2f34b10a3..73e75ac90525 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -536,6 +536,19 @@ out_free:
return rv;
}
+static ssize_t
+nfsd_print_version_support(char *buf, int remaining, const char *sep,
+ unsigned vers, unsigned minor)
+{
+ const char *format = (minor == 0) ? "%s%c%u" : "%s%c%u.%u";
+ bool supported = !!nfsd_vers(vers, NFSD_TEST);
+
+ if (vers == 4 && !nfsd_minorversion(minor, NFSD_TEST))
+ supported = false;
+ return snprintf(buf, remaining, format, sep,
+ supported ? '+' : '-', vers, minor);
+}
+
static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
@@ -561,6 +574,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, vers, size);
if (len <= 0) return -EINVAL;
do {
+ enum vers_op cmd;
sign = *vers;
if (sign == '+' || sign == '-')
num = simple_strtol((vers+1), &minorp, 0);
@@ -569,24 +583,22 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (*minorp == '.') {
if (num != 4)
return -EINVAL;
- minor = simple_strtoul(minorp+1, NULL, 0);
- if (minor == 0)
- return -EINVAL;
- if (nfsd_minorversion(minor, sign == '-' ?
- NFSD_CLEAR : NFSD_SET) < 0)
+ if (kstrtouint(minorp+1, 0, &minor) < 0)
return -EINVAL;
- goto next;
- }
+ } else
+ minor = 0;
+ cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
case 2:
case 3:
- case 4:
- nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
+ nfsd_vers(num, cmd);
break;
+ case 4:
+ if (nfsd_minorversion(minor, cmd) >= 0)
+ break;
default:
return -EINVAL;
}
- next:
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as
@@ -599,35 +611,23 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
len = 0;
sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT;
- for (num=2 ; num <= 4 ; num++)
- if (nfsd_vers(num, NFSD_AVAIL)) {
- len = snprintf(buf, remaining, "%s%c%d", sep,
- nfsd_vers(num, NFSD_TEST)?'+':'-',
- num);
- sep = " ";
-
- if (len >= remaining)
- break;
- remaining -= len;
- buf += len;
- tlen += len;
- }
- if (nfsd_vers(4, NFSD_AVAIL))
- for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION;
- minor++) {
- len = snprintf(buf, remaining, " %c4.%u",
- (nfsd_vers(4, NFSD_TEST) &&
- nfsd_minorversion(minor, NFSD_TEST)) ?
- '+' : '-',
- minor);
-
+ for (num=2 ; num <= 4 ; num++) {
+ if (!nfsd_vers(num, NFSD_AVAIL))
+ continue;
+ minor = 0;
+ do {
+ len = nfsd_print_version_support(buf, remaining,
+ sep, num, minor);
if (len >= remaining)
- break;
+ goto out;
remaining -= len;
buf += len;
tlen += len;
- }
-
+ minor++;
+ sep = " ";
+ } while (num == 4 && minor <= NFSD_SUPPORTED_MINOR_VERSION);
+ }
+out:
len = snprintf(buf, remaining, "\n");
if (len >= remaining)
return -EINVAL;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d74c8c44dc35..d96606801d47 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -362,16 +362,16 @@ void nfsd_lockd_shutdown(void);
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS)
-extern u32 nfsd_suppattrs[3][3];
+extern const u32 nfsd_suppattrs[3][3];
-static inline bool bmval_is_subset(u32 *bm1, u32 *bm2)
+static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
(bm1[1] & ~bm2[1]) ||
(bm1[2] & ~bm2[2]));
}
-static inline bool nfsd_attrs_supported(u32 minorversion, u32 *bmval)
+static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
{
return bmval_is_subset(bmval, nfsd_suppattrs[minorversion]);
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..fa82b7707e85 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -204,18 +204,14 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
struct nfsd_attrstat *resp)
{
__be32 nfserr;
- int stable = 1;
unsigned long cnt = argp->len;
dprintk("nfsd: WRITE %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &cnt,
- &stable);
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset,
+ rqstp->rq_vec, argp->vlen, &cnt, NFS_DATA_SYNC);
return nfsd_return_attrs(nfserr, resp);
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e6bfd96734c0..efd66da99201 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -153,6 +153,18 @@ int nfsd_vers(int vers, enum vers_op change)
return 0;
}
+static void
+nfsd_adjust_nfsd_versions4(void)
+{
+ unsigned i;
+
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
+ if (nfsd_supported_minorversions[i])
+ return;
+ }
+ nfsd_vers(4, NFSD_CLEAR);
+}
+
int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
@@ -160,9 +172,11 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
switch(change) {
case NFSD_SET:
nfsd_supported_minorversions[minorversion] = true;
+ nfsd_vers(4, NFSD_SET);
break;
case NFSD_CLEAR:
nfsd_supported_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4();
break;
case NFSD_TEST:
return nfsd_supported_minorversions[minorversion];
@@ -354,6 +368,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ifa->addr;
+ if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4516e8b7d776..005c911b34ac 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -615,6 +615,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
+extern void cleanup_callback_cred(void);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 26c6fdb4bf67..19d50f600e8d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -377,7 +377,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -390,11 +390,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -405,20 +405,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -426,29 +434,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -940,14 +955,12 @@ static int wait_for_concurrent_writes(struct file *file)
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int *stablep)
+ unsigned long *cnt, int stable)
{
struct svc_export *exp;
- struct inode *inode;
mm_segment_t oldfs;
__be32 err = 0;
int host_err;
- int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
@@ -962,13 +975,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
*/
current->flags |= PF_LESS_THROTTLE;
- inode = file_inode(file);
- exp = fhp->fh_export;
-
+ exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
- stable = 0;
+ stable = NFS_UNSTABLE;
if (stable && !use_wgather)
flags |= RWF_SYNC;
@@ -1035,35 +1046,22 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
* N.B. After this call fhp needs an fh_put
*/
__be32
-nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep)
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable)
{
- __be32 err = 0;
+ struct file *file = NULL;
+ __be32 err = 0;
trace_write_start(rqstp, fhp, offset, vlen);
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
- stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- } else {
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
- if (err)
- goto out;
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ if (err)
+ goto out;
- trace_write_opened(rqstp, fhp, offset, vlen);
- if (cnt)
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
- cnt, stablep);
- trace_write_io_done(rqstp, fhp, offset, vlen);
- fput(file);
- }
+ trace_write_opened(rqstp, fhp, offset, vlen);
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
+ trace_write_io_done(rqstp, fhp, offset, vlen);
+ fput(file);
out:
trace_write_done(rqstp, fhp, offset, vlen);
return err;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 0bf9e7bf5800..db98c48c735a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -83,12 +83,12 @@ __be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
- loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
+ struct kvec *, int, unsigned long *, int);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int *stablep);
+ int stable);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 2c90e285d7c6..03b8ba933eb2 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -34,7 +34,7 @@
static inline unsigned long
nilfs_palloc_groups_per_desc_block(const struct inode *inode)
{
- return (1UL << inode->i_blkbits) /
+ return i_blocksize(inode) /
sizeof(struct nilfs_palloc_group_desc);
}
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index d5c23da43513..c21e0b4454a6 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -50,7 +50,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
brelse(bh);
BUG();
}
- memset(bh->b_data, 0, 1 << inode->i_blkbits);
+ memset(bh->b_data, 0, i_blocksize(inode));
bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 2e315f9f2e51..06ffa135dfa6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -119,7 +119,7 @@ nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
{
- return 1 << btree->b_inode->i_blkbits;
+ return i_blocksize(btree->b_inode);
}
static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
@@ -1870,7 +1870,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
di = &dreq;
ni = NULL;
} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
- 1 << btree->b_inode->i_blkbits)) {
+ nilfs_btree_node_size(btree))) {
di = &dreq;
ni = &nreq;
} else {
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c7f4fef9ebf5..7ffe71a8dfb9 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -51,7 +51,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_add_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_add(n, &root->blocks_count);
}
@@ -60,7 +60,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_sub_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_sub(n, &root->blocks_count);
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index d56d3a5bea88..98835ed6bef4 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -57,7 +57,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
set_buffer_mapped(bh);
kaddr = kmap_atomic(bh->b_page);
- memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
+ memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
if (init_block)
init_block(inode, bh, kaddr);
flush_dcache_page(bh->b_page);
@@ -501,7 +501,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned int entry_size,
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_entry_size = entry_size;
- mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
+ mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bedcae2c28e6..7d18d62e8e07 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -723,7 +723,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
lock_page(page);
if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
unlock_page(page);
bh = head = page_buffers(page);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 11556b7d93ec..88a31e9340a0 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -608,7 +608,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
unsigned int block_end, block_start;
- unsigned int bsize = 1 << inode->i_blkbits;
+ unsigned int bsize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, bsize, 0);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 7025d8c27999..3e04279446e8 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2924,7 +2924,7 @@ again:
/*
* if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
* another try; otherwise, we are sure the MIGRATING state is there,
- * drop the unneded state which blocked threads trying to DIRTY
+ * drop the unneeded state which blocked threads trying to DIRTY
*/
spin_lock(&res->spinlock);
BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7b6a146327d7..8836305eb378 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* We know that zero_from is block aligned */
for (block_start = zero_from; block_start < zero_to;
block_start = block_end) {
- block_end = block_start + (1 << inode->i_blkbits);
+ block_end = block_start + i_blocksize(inode);
/*
* block_start is block-aligned. Bump it by one to force
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 06af81f71e10..9b96b99539d6 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -306,7 +306,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
break;
case S_IFDIR:
inode->i_size = PAGE_SIZE;
- orangefs_inode->blksize = (1 << inode->i_blkbits);
+ orangefs_inode->blksize = i_blocksize(inode);
spin_lock(&inode->i_lock);
inode_set_bytes(inode, inode->i_size);
spin_unlock(&inode->i_lock);
@@ -316,7 +316,7 @@ int orangefs_inode_getattr(struct inode *inode, int new, int bypass)
if (new) {
inode->i_size = (loff_t)strlen(new_op->
downcall.resp.getattr.link_target);
- orangefs_inode->blksize = (1 << inode->i_blkbits);
+ orangefs_inode->blksize = i_blocksize(inode);
ret = strscpy(orangefs_inode->link_target,
new_op->downcall.resp.getattr.link_target,
ORANGEFS_NAME_MAX);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b8f06273353e..1e1e182d571b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -766,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
if (!IS_ERR_OR_NULL(mm)) {
/* ensure this mm_struct can't be freed */
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
/* but do not pin its memory */
mmput(mm);
}
@@ -813,7 +813,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
return -ENOMEM;
copied = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
/* Maybe we should limit FOLL_FORCE to actual ptrace users? */
@@ -921,7 +921,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
return -ENOMEM;
ret = 0;
- if (!atomic_inc_not_zero(&mm->mm_users))
+ if (!mmget_not_zero(mm))
goto free;
down_read(&mm->mmap_sem);
@@ -1064,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
if (p) {
if (atomic_read(&p->mm->mm_users) > 1) {
mm = p->mm;
- atomic_inc(&mm->mm_count);
+ mmgrab(mm);
}
task_unlock(p);
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0b80ad87b4d6..ea9f3d1ae830 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -373,7 +373,10 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
phdr->p_vaddr = (size_t)m->addr;
- phdr->p_paddr = 0;
+ if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8f96a49178d0..ee3efb229ef6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -167,7 +167,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
@@ -1352,7 +1352,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
unsigned long end_vaddr;
int ret = 0, copied = 0;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
goto out;
ret = -EINVAL;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 37175621e890..1ef97cfcf422 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -219,7 +219,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-ESRCH);
mm = priv->mm;
- if (!mm || !atomic_inc_not_zero(&mm->mm_users))
+ if (!mm || !mmget_not_zero(mm))
return NULL;
down_read(&mm->mmap_sem);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2f8c5c9bdaf6..b396eb09f288 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
int ret = 0;
th.t_trans_id = 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (logit) {
reiserfs_write_lock(s);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index cfeae9b0a2b7..a6ab9d64ea1b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -525,7 +525,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
* referenced in convert_tail_for_hole() that may be called from
* reiserfs_get_block()
*/
- bh_result->b_size = (1 << inode->i_blkbits);
+ bh_result->b_size = i_blocksize(inode);
ret = reiserfs_get_block(inode, iblock, bh_result,
create | GET_BLOCK_NO_DANGLE);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e314cb30a181..feabcde0290d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1166,7 +1166,7 @@ static int reiserfs_parse_options(struct super_block *s,
if (!strcmp(arg, "auto")) {
/* From JFS code, to auto-get the size. */
*blocks =
- s->s_bdev->bd_inode->i_size >> s->
+ i_size_read(s->s_bdev->bd_inode) >> s->
s_blocksize_bits;
} else {
*blocks = simple_strtoul(arg, &p, 0);
diff --git a/fs/stat.c b/fs/stat.c
index a268b7f27adf..3f14d1ef0868 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
- stat->blksize = (1 << inode->i_blkbits);
+ stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8ec6b3df0bc7..a8d8f71ef8bd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1193,7 +1193,7 @@ int udf_setsize(struct inode *inode, loff_t newsize)
{
int err;
struct udf_inode_info *iinfo;
- int bsize = 1 << inode->i_blkbits;
+ int bsize = i_blocksize(inode);
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 625b7285a37b..3c421d06a18e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1807,17 +1807,17 @@ static void init_once_userfaultfd_ctx(void *mem)
}
/**
- * userfaultfd_file_create - Creates an userfaultfd file pointer.
+ * userfaultfd_file_create - Creates a userfaultfd file pointer.
* @flags: Flags for the userfaultfd file.
*
- * This function creates an userfaultfd file pointer, w/out installing
+ * This function creates a userfaultfd file pointer, w/out installing
* it into the fd table. This is useful when the userfaultfd file is
* used during the initialization of data structures that require
* extra setup after the userfaultfd creation. So the userfaultfd
* creation is split into the file pointer creation phase, and the
* file descriptor installation phase. In this way races with
* userspace closing the newly installed file descriptor can be
- * avoided. Returns an userfaultfd file pointer, or a proper error
+ * avoided. Returns a userfaultfd file pointer, or a proper error
* pointer.
*/
static struct file *userfaultfd_file_create(int flags)
@@ -1847,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags)
ctx->released = false;
ctx->mm = current->mm;
/* prevent the mm struct to be freed */
- atomic_inc(&ctx->mm->mm_count);
+ mmgrab(ctx->mm);
file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1ff9df7a3ce8..bf65a9ea8642 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -103,9 +103,9 @@ xfs_finish_page_writeback(
unsigned int bsize;
ASSERT(bvec->bv_offset < PAGE_SIZE);
- ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
ASSERT(end < PAGE_SIZE);
- ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+ ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
bh = head = page_buffers(bvec->bv_page);
@@ -349,7 +349,7 @@ xfs_map_blocks(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- ssize_t count = 1 << inode->i_blkbits;
+ ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb, end_fsb;
int error = 0;
int bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -758,7 +758,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while ((bh = bh->b_this_page) != head);
@@ -846,7 +846,7 @@ xfs_writepage_map(
LIST_HEAD(submit_list);
struct xfs_ioend *ioend, *next;
struct buffer_head *bh, *head;
- ssize_t len = 1 << inode->i_blkbits;
+ ssize_t len = i_blocksize(inode);
int error = 0;
int count = 0;
int uptodate = 1;
@@ -1210,7 +1210,7 @@ xfs_map_trim_size(
offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
- 1 << inode->i_blkbits);
+ i_blocksize(inode));
}
if (mapping_size > LONG_MAX)
mapping_size = LONG_MAX;
@@ -1241,7 +1241,7 @@ xfs_get_blocks(
return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
- ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ ASSERT(bh_result->b_size >= i_blocksize(inode));
size = bh_result->b_size;
if (offset >= i_size_read(inode))
@@ -1389,7 +1389,7 @@ xfs_vm_set_page_dirty(
if (offset < end_offset)
set_buffer_dirty(bh);
bh = bh->b_this_page;
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while (bh != head);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a50eca676670..35703a801372 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -754,7 +754,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
@@ -776,7 +776,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
OpenPOWER on IntegriCloud