diff options
Diffstat (limited to 'fs')
47 files changed, 543 insertions, 433 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 69ec23daa25e..a1e6860b6f46 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -574,7 +574,7 @@ static int load_flat_file(struct linux_binprm *bprm, MAX_SHARED_LIBS * sizeof(unsigned long), FLAT_DATA_ALIGN); - pr_debug("Allocated data+bss+stack (%ld bytes): %lx\n", + pr_debug("Allocated data+bss+stack (%u bytes): %lx\n", data_len + bss_len + stack_len, datapos); fpos = ntohl(hdr->data_start); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 375f8c728d91..e3b0b4196d3d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4825,10 +4825,6 @@ skip_async: else flush = BTRFS_RESERVE_NO_FLUSH; spin_lock(&space_info->lock); - if (can_overcommit(fs_info, space_info, orig, flush, false)) { - spin_unlock(&space_info->lock); - break; - } if (list_empty(&space_info->tickets) && list_empty(&space_info->priority_tickets)) { spin_unlock(&space_info->lock); @@ -7589,6 +7585,10 @@ search: u64 offset; int cached; + /* If the block group is read-only, we can skip it entirely. */ + if (unlikely(block_group->ro)) + continue; + btrfs_grab_block_group(block_group, delalloc); search_start = block_group->key.objectid; @@ -7624,8 +7624,6 @@ have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) goto loop; - if (unlikely(block_group->ro)) - goto loop; /* * Ok we want to try and use the cluster allocator, so @@ -7839,6 +7837,7 @@ loop: failed_alloc = false; BUG_ON(index != get_block_group_index(block_group)); btrfs_release_block_group(block_group, delalloc); + cond_resched(); } up_read(&space_info->groups_sem); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f20ef211a73d..3a11ae63676e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2153,8 +2153,7 @@ process_leaf: u32 this_len = sizeof(*di) + name_len + data_len; char *name; - ret = verify_dir_item(fs_info, path->nodes[0], - path->slots[0], di); + ret = verify_dir_item(fs_info, path->nodes[0], i, di); if (ret) { ret = -EIO; goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5eb7217738ed..e8b9a269fdde 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2702,7 +2702,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->chunk_mutex); old_total = btrfs_super_total_bytes(super_copy); - diff = new_size - device->total_bytes; + diff = round_down(new_size - device->total_bytes, fs_info->sectorsize); if (new_size <= device->total_bytes || device->is_tgtdev_for_dev_replace) { @@ -4406,7 +4406,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 diff; new_size = round_down(new_size, fs_info->sectorsize); - diff = old_size - new_size; + diff = round_down(old_size - new_size, fs_info->sectorsize); if (device->is_tgtdev_for_dev_replace) return -EINVAL; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e071d23f6148..ef7240ace576 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -271,6 +271,11 @@ out: if (ret < 0) err = ret; dput(last); + /* last_name no longer match cache index */ + if (fi->readdir_cache_idx >= 0) { + fi->readdir_cache_idx = -1; + fi->dir_release_count = 0; + } } return err; } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 79dafa71effd..51f0aea70cb4 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -175,11 +175,8 @@ ext2_get_acl(struct inode *inode, int type) return acl; } -/* - * inode->i_mutex: down - */ -int -ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +static int +__ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int name_index; void *value = NULL; @@ -189,13 +186,6 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: @@ -222,6 +212,31 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) } /* + * inode->i_mutex: down + */ +int +ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int error; + int update_mode = 0; + umode_t mode = inode->i_mode; + + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + return error; + update_mode = 1; + } + error = __ext2_set_acl(inode, acl, type); + if (!error && update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } + return error; +} + +/* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * * dir->i_mutex: down @@ -238,12 +253,12 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return error; if (default_acl) { - error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = __ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) - error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = __ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } return error; diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a140c5e3dc54..b4b8438c42ef 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -211,7 +211,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { + if (acl && !ipage) { error = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (error) return error; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 56bbf592e487..5b876f6d3f6b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -879,6 +879,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type) struct inode *inode; struct f2fs_inode_info *fi; bool is_dir = (type == DIR_INODE); + unsigned long ino = 0; trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir, get_pages(sbi, is_dir ? @@ -901,8 +902,17 @@ retry: inode = igrab(&fi->vfs_inode); spin_unlock(&sbi->inode_lock[type]); if (inode) { + unsigned long cur_ino = inode->i_ino; + filemap_fdatawrite(inode->i_mapping); iput(inode); + /* We need to give cpu to another writers. */ + if (ino == cur_ino) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + } else { + ino = cur_ino; + } } else { /* * We should submit bio, since it exists several diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0e6d2c65a9e..2706130c261b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1538,7 +1538,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { - inode_unlock(inode); ret = -EPERM; goto unlock_out; } @@ -1549,9 +1548,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); ret = -EPERM; - goto out; + goto unlock_out; } } @@ -1564,7 +1562,6 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) f2fs_mark_inode_dirty_sync(inode, false); unlock_out: inode_unlock(inode); -out: mnt_drop_write_file(filp); return ret; } diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 9adc202fcd6f..71191d89917d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -11,6 +11,7 @@ */ #include <linux/proc_fs.h> #include <linux/f2fs_fs.h> +#include <linux/seq_file.h> #include "f2fs.h" #include "segment.h" diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index 9b92058a1240..6bb5d7c42888 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -51,8 +51,8 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) return acl; } -int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, - int type) +static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, + int type) { int err; char *xattr_name; @@ -64,12 +64,6 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, switch (type) { case ACL_TYPE_ACCESS: xattr_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - err = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (err) - return err; - } - err = 0; break; case ACL_TYPE_DEFAULT: @@ -105,6 +99,18 @@ end_set_acl: return err; } +int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int err; + + if (type == ACL_TYPE_ACCESS && acl) { + err = posix_acl_update_mode(inode, &inode->i_mode, &acl); + if (err) + return err; + } + return __hfsplus_set_posix_acl(inode, acl, type); +} + int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) { int err = 0; @@ -122,15 +128,15 @@ int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir) return err; if (default_acl) { - err = hfsplus_set_posix_acl(inode, default_acl, - ACL_TYPE_DEFAULT); + err = __hfsplus_set_posix_acl(inode, default_acl, + ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!err) - err = hfsplus_set_posix_acl(inode, acl, - ACL_TYPE_ACCESS); + err = __hfsplus_set_posix_acl(inode, acl, + ACL_TYPE_ACCESS); posix_acl_release(acl); } return err; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 8cf898a59730..217a5e7815da 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -410,7 +410,11 @@ static int parse_options(char *options, struct iso9660_options *popt) if (match_int(&args[0], &option)) return 0; n = option; - if (n > 99) + /* + * Track numbers are supposed to be in range 1-99, the + * mount option starts indexing at 0. + */ + if (n >= 99) return 0; popt->session = n + 1; break; @@ -543,7 +547,7 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) vol_desc_start=0; ms_info.addr_format=CDROM_LBA; - if(session >= 0 && session <= 99) { + if (session > 0) { struct cdrom_tocentry Te; Te.cdte_track=session; Te.cdte_format=CDROM_LBA; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 7bc186f4ed4d..2e71b6e7e646 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -77,13 +77,6 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, switch (type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - rc = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (rc) - return rc; - inode->i_ctime = current_time(inode); - mark_inode_dirty(inode); - } break; case ACL_TYPE_DEFAULT: ea_name = XATTR_NAME_POSIX_ACL_DEFAULT; @@ -115,12 +108,27 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int rc; tid_t tid; + int update_mode = 0; + umode_t mode = inode->i_mode; tid = txBegin(inode->i_sb, 0); mutex_lock(&JFS_IP(inode)->commit_mutex); + if (type == ACL_TYPE_ACCESS && acl) { + rc = posix_acl_update_mode(inode, &mode, &acl); + if (rc) + goto end_tx; + update_mode = 1; + } rc = __jfs_set_acl(tid, inode, type, acl); - if (!rc) + if (!rc) { + if (update_mode) { + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + mark_inode_dirty(inode); + } rc = txCommit(tid, 1, &inode, 0); + } +end_tx: txEnd(tid); mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index bd9b641ada2c..7ddcb445a3d9 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -98,7 +98,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto out; } - VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; + VolumeSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (VolumeSize) { if (newLVSize > VolumeSize) { @@ -211,7 +211,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) txQuiesce(sb); /* Reset size of direct inode */ - sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size; + sbi->direct_inode->i_size = i_size_read(sb->s_bdev->bd_inode); if (sbi->mntflag & JFS_INLINELOG) { /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index e8aad7d87b8c..78b41e1d5c67 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -313,7 +313,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } case Opt_resize_nosize: { - *newLVSize = sb->s_bdev->bd_inode->i_size >> + *newLVSize = i_size_read(sb->s_bdev->bd_inode) >> sb->s_blocksize_bits; if (*newLVSize == 0) pr_err("JFS: Cannot determine volume size\n"); @@ -579,7 +579,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) goto out_unload; } inode->i_ino = 0; - inode->i_size = sb->s_bdev->bd_inode->i_size; + inode->i_size = i_size_read(sb->s_bdev->bd_inode); inode->i_mapping->a_ops = &jfs_metapage_aops; hlist_add_fake(&inode->i_hash); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 726b6cecf430..b995bdc13976 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -396,7 +396,7 @@ out_rqst: return error; } -static struct svc_serv_ops lockd_sv_ops = { +static const struct svc_serv_ops lockd_sv_ops = { .svo_shutdown = svc_rpcb_cleanup, .svo_enqueue_xprt = svc_xprt_do_enqueue, }; diff --git a/fs/mount.h b/fs/mount.h index de45d9e76748..6790767d1883 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -16,7 +16,7 @@ struct mnt_namespace { u64 event; unsigned int mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; -}; +} __randomize_layout; struct mnt_pcp { int mnt_count; @@ -69,7 +69,7 @@ struct mount { struct hlist_head mnt_pins; struct fs_pin mnt_umount; struct dentry *mnt_ex_mountpoint; -}; +} __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namei.c b/fs/namei.c index 88fd38d1e3e7..ddb6a7c2b3d4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -524,7 +524,7 @@ struct nameidata { struct inode *link_inode; unsigned root_seq; int dfd; -}; +} __randomize_layout; static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) { diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 34323877ec13..2cddf7f437e6 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -226,26 +226,26 @@ err_bind: return ret; } -static struct svc_serv_ops nfs40_cb_sv_ops = { +static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) -static struct svc_serv_ops nfs41_cb_sv_ops = { +static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; @@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = { static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + const struct svc_serv_ops *sv_ops; struct svc_serv *serv; - struct svc_serv_ops *sv_ops; /* * Check whether we're already up and running. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee5ddbd36088..efebe6cf4378 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -820,6 +820,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; + target->port = source->port; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ac484fe0dee..3522b1249019 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2372,16 +2372,40 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); +#define NFS_MAY_READ (NFS4_ACCESS_READ) +#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND | \ + NFS4_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND) +#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE +#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +static int +nfs_access_calc_mask(u32 access_result, umode_t umode) +{ + int mask = 0; + + if (access_result & NFS_MAY_READ) + mask |= MAY_READ; + if (S_ISDIR(umode)) { + if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP) + mask |= MAY_EXEC; + } else if (S_ISREG(umode)) { + if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE) + mask |= MAY_EXEC; + } else if (access_result & NFS_MAY_WRITE) + mask |= MAY_WRITE; + return mask; +} + void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = 0; - if (access_result & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (access_result & - (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); @@ -2389,6 +2413,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; + int cache_mask; int status; trace_nfs_access_enter(inode); @@ -2404,7 +2429,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) goto out; /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE + | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); @@ -2418,7 +2444,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); + if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, status); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5713eb32a45e..af330c31f627 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -617,6 +617,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) goto out; } + if (iocb->ki_pos > i_size_read(inode)) + nfs_revalidate_mapping(inode, file->f_mapping); nfs_start_io_write(inode); result = generic_write_checks(iocb, from); @@ -750,7 +752,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) */ nfs_sync_mapping(filp->f_mapping); if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) - nfs_zap_mapping(inode, filp->f_mapping); + nfs_zap_caches(inode); out: return status; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 080fc6b278bd..44c638b7876c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -542,6 +542,10 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; + /* Is the deviceid already set? If so, we're good. */ + if (fl->dsaddr != NULL) + return 0; + /* find and reference the deviceid */ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, lo->plh_lc_cred, gfp_flags); @@ -553,8 +557,6 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; - fl->dsaddr = dsaddr; - if (fl->first_stripe_index >= dsaddr->stripe_count) { dprintk("%s Bad first_stripe_index %u\n", __func__, fl->first_stripe_index); @@ -570,6 +572,13 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, goto out_put; } status = 0; + + /* + * Atomic compare and xchange to ensure we don't scribble + * over a non-NULL pointer. + */ + if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL) + goto out_put; out: return status; out_put: diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1f2ac3dd0fe5..b0fa83a60754 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1842,6 +1842,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) int vers, ret; struct nfs_fh *fh; + if (!lseg || !(pnfs_is_valid_lseg(lseg) || + test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) + goto out_err; + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); if (!ds) diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 3efe946672be..60bad882c123 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -512,7 +512,7 @@ static const struct rpc_version mnt_version1 = { .counts = mnt_counts, }; -static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)]; +static unsigned int mnt3_counts[ARRAY_SIZE(mnt3_procedures)]; static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index df4a7d3ab915..d1e87ec0df84 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,15 +220,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, res.fattr); - if (status == 0) { - entry->mask = 0; - if (res.access & NFS3_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; - } + if (status == 0) + nfs_access_set_mask(entry, res.access); nfs_free_fattr(res.fattr); out: dprintk("NFS reply access: %d\n", status); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0b4e1091340..18ca6879d8de 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2236,7 +2236,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred, int openflags) { struct nfs_access_entry cache; - u32 mask; + u32 mask, flags; /* access call failed or for some reason the server doesn't * support any access modes -- defer access call until later */ @@ -2250,16 +2250,20 @@ static int nfs4_opendata_access(struct rpc_cred *cred, */ if (openflags & __FMODE_EXEC) { /* ONLY check for exec rights */ - mask = MAY_EXEC; + if (S_ISDIR(state->inode->i_mode)) + mask = NFS4_ACCESS_LOOKUP; + else + mask = NFS4_ACCESS_EXECUTE; } else if ((fmode & FMODE_READ) && !opendata->file_created) - mask = MAY_READ; + mask = NFS4_ACCESS_READ; cache.cred = cred; cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); nfs_access_add_cache(state->inode, &cache); - if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) + flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; + if ((mask & ~cache.mask & flags) == 0) return 0; return -EACCES; @@ -6492,7 +6496,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&q->lock, flags); - freezable_schedule_timeout_interruptible(NFS4_LOCK_MAXTIMEOUT); + freezable_schedule_timeout(NFS4_LOCK_MAXTIMEOUT); } finish_wait(q, &wait); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d40755a0984b..25f28fa64c57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -159,13 +159,18 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, { struct pnfs_commit_bucket *b; struct pnfs_layout_segment *freeme; + int nwritten; int i; lockdep_assert_held(&cinfo->inode->i_lock); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (pnfs_generic_transfer_commit_list(&b->written, dst, - cinfo, 0)) { + nwritten = pnfs_generic_transfer_commit_list(&b->written, + dst, cinfo, 0); + if (!nwritten) + continue; + cinfo->ds->nwritten -= nwritten; + if (list_empty(&b->written)) { freeme = b->wlseg; b->wlseg = NULL; spin_unlock(&cinfo->inode->i_lock); @@ -174,7 +179,6 @@ restart: goto restart; } } - cinfo->ds->nwritten = 0; } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); @@ -183,6 +187,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; + struct list_head *pos; LIST_HEAD(pages); int i; @@ -193,6 +198,8 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) continue; freeme = bucket->clseg; bucket->clseg = NULL; + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, &pages); spin_unlock(&cinfo->inode->i_lock); nfs_retry_commit(&pages, freeme, cinfo, i); @@ -217,13 +224,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) continue; - /* - * If the layout segment is invalid, then let - * pnfs_generic_retry_commit() clean up the bucket. - */ - if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) && - !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags)) - break; data = nfs_commitdata_alloc(false); if (!data) break; @@ -243,9 +243,12 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *bucket; + struct list_head *pos; bucket = &cinfo->ds->buckets[data->ds_commit_index]; spin_lock(&cinfo->inode->i_lock); + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; @@ -330,7 +333,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } } out: - cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d27e75ad25e3..3c69db7d4905 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -784,6 +784,14 @@ out: return status; } + +static void +nfsd4_read_release(union nfsd4_op_u *u) +{ + if (u->read.rd_filp) + fput(u->read.rd_filp); +} + static __be32 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -912,6 +920,13 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat return nfs_ok; } +static void +nfsd4_secinfo_release(union nfsd4_op_u *u) +{ + if (u->secinfo.si_exp) + exp_put(u->secinfo.si_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1335,6 +1350,12 @@ out: return nfserr; } +static void +nfsd4_getdeviceinfo_release(union nfsd4_op_u *u) +{ + kfree(u->getdeviceinfo.gd_device); +} + static __be32 nfsd4_layoutget(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1415,6 +1436,12 @@ out: return nfserr; } +static void +nfsd4_layoutget_release(union nfsd4_op_u *u) +{ + kfree(u->layoutget.lg_content); +} + static __be32 nfsd4_layoutcommit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1541,49 +1568,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum) nfsdstats.nfs4_opcount[opnum]++; } -enum nfsd4_op_flags { - ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ - ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ - ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ - /* For rfc 5661 section 2.6.3.1.1: */ - OP_HANDLES_WRONGSEC = 1 << 3, - OP_IS_PUTFH_LIKE = 1 << 4, - /* - * These are the ops whose result size we estimate before - * encoding, to avoid performing an op then not being able to - * respond or cache a response. This includes writes and setattrs - * as well as the operations usually called "nonidempotent": - */ - OP_MODIFIES_SOMETHING = 1 << 5, - /* - * Cache compounds containing these ops in the xid-based drc: - * We use the DRC for compounds containing non-idempotent - * operations, *except* those that are 4.1-specific (since - * sessions provide their own EOS), and except for stateful - * operations other than setclientid and setclientid_confirm - * (since sequence numbers provide EOS for open, lock, etc in - * the v4.0 case). - */ - OP_CACHEME = 1 << 6, - /* - * These are ops which clear current state id. - */ - OP_CLEAR_STATEID = 1 << 7, -}; - -struct nfsd4_operation { - __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, - union nfsd4_op_u *); - u32 op_flags; - char *op_name; - /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); - void (*op_get_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); - void (*op_set_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); -}; - static const struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); @@ -1621,7 +1605,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } -static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) { return &nfsd4_ops[op->opnum]; } @@ -1694,7 +1678,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd4_compoundargs *args = rqstp->rq_argp; struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_op *op; - const struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; @@ -1747,15 +1730,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - opdesc = OPDESC(op); - if (!current_fh->fh_dentry) { - if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { + if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } } else if (current_fh->fh_export->ex_fslocs.migrated && - !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { + !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } @@ -1763,12 +1744,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) fh_clear_wcc(current_fh); /* If op is non-idempotent */ - if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a * succesful reply: */ - u32 plen = opdesc->op_rsize_bop(rqstp, op); + u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* * Plus if there's another operation, make sure * we'll have space to at least encode an error: @@ -1781,9 +1762,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->status) goto encode_op; - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); + if (op->opdesc->op_get_currentstateid) + op->opdesc->op_get_currentstateid(cstate, &op->u); + op->status = op->opdesc->op_func(rqstp, cstate, &op->u); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { @@ -1792,10 +1773,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto out; } if (!op->status) { - if (opdesc->op_set_currentstateid) - opdesc->op_set_currentstateid(cstate, &op->u); + if (op->opdesc->op_set_currentstateid) + op->opdesc->op_set_currentstateid(cstate, &op->u); - if (opdesc->op_flags & OP_CLEAR_STATEID) + if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); if (need_wrongsec_check(rqstp)) @@ -2160,13 +2141,15 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCK] = { .op_func = nfsd4_lock, - .op_flags = OP_MODIFIES_SOMETHING, + .op_flags = OP_MODIFIES_SOMETHING | + OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCK", .op_rsize_bop = nfsd4_lock_rsize, .op_set_currentstateid = nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = nfsd4_lockt, + .op_flags = OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCKT", .op_rsize_bop = nfsd4_lock_rsize, }, @@ -2238,6 +2221,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_READ] = { .op_func = nfsd4_read, + .op_release = nfsd4_read_release, .op_name = "OP_READ", .op_rsize_bop = nfsd4_read_rsize, .op_get_currentstateid = nfsd4_get_readstateid, @@ -2287,6 +2271,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO] = { .op_func = nfsd4_secinfo, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2294,14 +2279,16 @@ static const struct nfsd4_operation nfsd4_ops[] = { [OP_SETATTR] = { .op_func = nfsd4_setattr, .op_name = "OP_SETATTR", - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_rsize_bop = nfsd4_setattr_rsize, .op_get_currentstateid = nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = nfsd4_setclientid, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_MODIFIES_SOMETHING | OP_CACHEME, + | OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_SETCLIENTID", .op_rsize_bop = nfsd4_setclientid_rsize, }, @@ -2388,6 +2375,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2408,12 +2396,14 @@ static const struct nfsd4_operation nfsd4_ops[] = { #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = { .op_func = nfsd4_getdeviceinfo, + .op_release = nfsd4_getdeviceinfo_release, .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_GETDEVICEINFO", .op_rsize_bop = nfsd4_getdeviceinfo_rsize, }, [OP_LAYOUTGET] = { .op_func = nfsd4_layoutget, + .op_release = nfsd4_layoutget_release, .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LAYOUTGET", .op_rsize_bop = nfsd4_layoutget_rsize, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5f940d2a136b..08691fe394b2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1778,7 +1778,7 @@ nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); -static nfsd4_dec nfsd4_dec_ops[] = { +static const nfsd4_dec nfsd4_dec_ops[] = { [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, @@ -1927,6 +1927,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } + op->opdesc = OPDESC(op); /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: @@ -3102,14 +3103,12 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(access->ac_supported); - *p++ = cpu_to_be32(access->ac_resp_access); - } - return nfserr; + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); + return 0; } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) @@ -3117,17 +3116,15 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(bcts->dir); - /* Upshifting from TCP to RDMA is not supported */ - *p++ = cpu_to_be32(0); - } - return nfserr; + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); + /* Upshifting from TCP to RDMA is not supported */ + *p++ = cpu_to_be32(0); + return 0; } static __be32 @@ -3135,10 +3132,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3148,14 +3142,12 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, NFS4_VERIFIER_SIZE); - } - return nfserr; + return 0; } static __be32 @@ -3164,15 +3156,13 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &create->cr_cinfo); - nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], - create->cr_bmval[1], create->cr_bmval[2]); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], + create->cr_bmval[1], create->cr_bmval[2]); + return 0; } static __be32 @@ -3181,13 +3171,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = &resp->xdr; - if (nfserr) - return nfserr; - - nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, - getattr->ga_bmval, - resp->rqstp, 0); - return nfserr; + return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); } static __be32 @@ -3198,14 +3183,12 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh unsigned int len; __be32 *p; - if (!nfserr) { - len = fhp->fh_handle.fh_size; - p = xdr_reserve_space(xdr, len + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); - } - return nfserr; + len = fhp->fh_handle.fh_size; + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + return 0; } /* @@ -3275,10 +3258,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3288,13 +3268,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &link->li_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); + return 0; } @@ -3304,12 +3282,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - goto out; - nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -3319,7 +3294,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], open->op_bmval[2]); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 4); if (!p) @@ -3392,8 +3367,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op BUG(); } /* XXX save filehandle here */ -out: - return nfserr; + return 0; } static __be32 @@ -3401,10 +3375,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 @@ -3412,10 +3383,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &od->od_stateid); } static __be32 nfsd4_encode_splice_read( @@ -3552,20 +3520,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct raparms *ra = NULL; __be32 *p; - if (nfserr) - goto out; - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } xdr_commit_encode(xdr); @@ -3589,9 +3552,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) xdr_truncate_encode(xdr, starting_len); -out: - if (file) - fput(file); return nfserr; } @@ -3605,9 +3565,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int length_offset = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -3651,9 +3608,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int starting_len = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -3739,13 +3693,11 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &remove->rm_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); + return 0; } static __be32 @@ -3754,19 +3706,16 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 40); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &rename->rn_sinfo); - p = encode_cinfo(p, &rename->rn_tinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); + return 0; } static __be32 -nfsd4_do_encode_secinfo(struct xdr_stream *xdr, - __be32 nfserr, struct svc_export *exp) +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; @@ -3774,9 +3723,6 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, __be32 *p, *flavorsp; static bool report = true; - if (nfserr) - goto out; - nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3800,7 +3746,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported = 0; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { @@ -3812,7 +3758,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4 + 4 + XDR_LEN(info.oid.len) + 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(RPC_AUTH_GSS); p = xdr_encode_opaque(p, info.oid.data, info.oid.len); *p++ = cpu_to_be32(info.qop); @@ -3821,7 +3767,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported++; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(pf); } else { if (report) @@ -3833,11 +3779,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, if (nflavs != supported) report = false; *flavorsp = htonl(supported); - nfserr = 0; -out: - if (exp) - exp_put(exp); - return nfserr; + return 0; } static __be32 @@ -3846,7 +3788,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } static __be32 @@ -3855,7 +3797,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } /* @@ -3916,16 +3858,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 16); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_how_written); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - } - return nfserr; + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); + return 0; } static __be32 @@ -3938,12 +3878,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, char *server_scope; int major_id_sz; int server_scope_sz; - int status = 0; uint64_t minor_id = 0; - if (nfserr) - return nfserr; - major_id = utsname()->nodename; major_id_sz = strlen(major_id); server_scope = utsname()->nodename; @@ -3968,19 +3904,19 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, break; case SP4_MACH_CRED: /* spo_must_enforce bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_enforce[0], exid->spo_must_enforce[1], exid->spo_must_enforce[2]); - if (status) - goto out; + if (nfserr) + return nfserr; /* spo_must_allow bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_allow[0], exid->spo_must_allow[1], exid->spo_must_allow[2]); - if (status) - goto out; + if (nfserr) + return nfserr; break; default: WARN_ON_ONCE(1); @@ -4007,8 +3943,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* Implementation id */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; -out: - return status; } static __be32 @@ -4018,9 +3952,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -4074,9 +4005,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); if (!p) return nfserr_resource; @@ -4101,9 +4029,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); if (!p) return nfserr_resource; @@ -4113,7 +4038,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = stateid->ts_id_status; } - return nfserr; + return 0; } #ifdef CONFIG_NFSD_PNFS @@ -4126,14 +4051,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 starting_len = xdr->buf->len, needed_len; __be32 *p; - dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr)); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(gdev->gd_layout_type); @@ -4149,42 +4069,33 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, */ if (xdr->buf->len + 4 > gdev->gd_maxcount) goto toosmall; - goto out; + return nfserr; } } - nfserr = nfserr_resource; if (gdev->gd_notify_types) { p = xdr_reserve_space(xdr, 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* bitmap length */ *p++ = cpu_to_be32(gdev->gd_notify_types); } else { p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = 0; } - nfserr = 0; -out: - kfree(gdev->gd_device); - dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); - return nfserr; - + return 0; toosmall: dprintk("%s: maxcount too small\n", __func__); needed_len = xdr->buf->len + 4 /* notifications */; xdr_truncate_encode(xdr, starting_len); p = xdr_reserve_space(xdr, 4); - if (!p) { - nfserr = nfserr_resource; - } else { - *p++ = cpu_to_be32(needed_len); - nfserr = nfserr_toosmall; - } - goto out; + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(needed_len); + return nfserr_toosmall; } static __be32 @@ -4195,14 +4106,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, const struct nfsd4_layout_ops *ops; __be32 *p; - dprintk("%s: err %d\n", __func__, nfserr); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* we always set return-on-close */ *p++ = cpu_to_be32(lgp->lg_sid.si_generation); @@ -4216,10 +4122,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_layout_type); ops = nfsd4_layout_ops[lgp->lg_layout_type]; - nfserr = ops->encode_layoutget(xdr, lgp); -out: - kfree(lgp->lg_content); - return nfserr; + return ops->encode_layoutget(xdr, lgp); } static __be32 @@ -4229,9 +4132,6 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -4243,7 +4143,7 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_encode_hyper(p, lcp->lc_newsize); } - return nfs_ok; + return 0; } static __be32 @@ -4253,16 +4153,13 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; *p++ = cpu_to_be32(lrp->lrs_present); if (lrp->lrs_present) return nfsd4_encode_stateid(xdr, &lrp->lr_sid); - return nfs_ok; + return 0; } #endif /* CONFIG_NFSD_PNFS */ @@ -4289,16 +4186,14 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (!nfserr) { - nfserr = nfsd42_encode_write_res(resp, ©->cp_res); - if (nfserr) - return nfserr; + nfserr = nfsd42_encode_write_res(resp, ©->cp_res); + if (nfserr) + return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); - *p++ = cpu_to_be32(copy->cp_consecutive); - *p++ = cpu_to_be32(copy->cp_synchronous); - } - return nfserr; + p = xdr_reserve_space(&resp->xdr, 4 + 4); + *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = cpu_to_be32(copy->cp_synchronous); + return 0; } static __be32 @@ -4307,14 +4202,11 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); - return nfserr; + return 0; } static __be32 @@ -4330,7 +4222,7 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * since we don't need to filter out obsolete ops as this is * done in the decoding phase. */ -static nfsd4_enc nfsd4_enc_ops[] = { +static const nfsd4_enc nfsd4_enc_ops[] = { [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, @@ -4449,6 +4341,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; + const struct nfsd4_operation *opdesc = op->opdesc; int post_err_offset; nfsd4_enc encoder; __be32 *p; @@ -4463,10 +4356,15 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->opnum == OP_ILLEGAL) goto status; + if (op->status && opdesc && + !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) + goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 063ae7de2c12..7e3af3ef0917 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static struct svc_serv_ops nfsd_thread_sv_ops = { +static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 72c6ad136107..f8a0b6549a88 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -538,6 +538,7 @@ struct nfsd4_seek { struct nfsd4_op { int opnum; + const struct nfsd4_operation * opdesc; __be32 status; union nfsd4_op_u { struct nfsd4_access access; @@ -661,6 +662,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op); int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); void warn_on_nonidempotent_op(struct nfsd4_op *op); @@ -748,6 +750,53 @@ extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, + /* Most ops return only an error on failure; some may do more: */ + OP_NONTRIVIAL_ERROR_ENCODE = 1 << 8, +}; + +struct nfsd4_operation { + __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, + union nfsd4_op_u *); + void (*op_release)(union nfsd4_op_u *); + u32 op_flags; + char *op_name; + /* Try to get response size before operation */ + u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + void (*op_get_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); + void (*op_set_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); +}; + + #endif /* diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 641d9ee97f91..48b70e6490f3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -481,17 +481,30 @@ out_cleanup: } static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, - struct cattr *attr, struct dentry *hardlink) + struct cattr *attr, struct dentry *hardlink, + bool origin) { int err; const struct cred *old_cred; struct cred *override_cred; + struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(dentry->d_parent); + err = ovl_copy_up(parent); if (err) return err; old_cred = ovl_override_creds(dentry->d_sb); + + /* + * When linking a file with copy up origin into a new parent, mark the + * new parent dir "impure". + */ + if (origin) { + err = ovl_set_impure(parent, ovl_dentry_upper(parent)); + if (err) + goto out_revert_creds; + } + err = -ENOMEM; override_cred = prepare_creds(); if (override_cred) { @@ -550,7 +563,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, inode_init_owner(inode, dentry->d_parent->d_inode, mode); attr.mode = inode->i_mode; - err = ovl_create_or_link(dentry, inode, &attr, NULL); + err = ovl_create_or_link(dentry, inode, &attr, NULL, false); if (err) iput(inode); @@ -609,7 +622,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, inode = d_inode(old); ihold(inode); - err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old)); + err = ovl_create_or_link(new, inode, NULL, ovl_dentry_upper(old), + ovl_type_origin(old)); if (err) iput(inode); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 69f4fc26ee39..5bc71642b226 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -202,37 +202,38 @@ bool ovl_is_private_xattr(const char *name) sizeof(OVL_XATTR_PREFIX) - 1) == 0; } -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags) { int err; - struct path realpath; - enum ovl_path_type type = ovl_path_real(dentry, &realpath); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); + struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); const struct cred *old_cred; err = ovl_want_write(dentry); if (err) goto out; - if (!value && !OVL_TYPE_UPPER(type)) { - err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (!value && !upperdentry) { + err = vfs_getxattr(realdentry, name, NULL, 0); if (err < 0) goto out_drop_write; } - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; + if (!upperdentry) { + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; - if (!OVL_TYPE_UPPER(type)) - ovl_path_upper(dentry, &realpath); + realdentry = ovl_dentry_upper(dentry); + } old_cred = ovl_override_creds(dentry->d_sb); if (value) - err = vfs_setxattr(realpath.dentry, name, value, size, flags); + err = vfs_setxattr(realdentry, name, value, size, flags); else { WARN_ON(flags != XATTR_REPLACE); - err = vfs_removexattr(realpath.dentry, name); + err = vfs_removexattr(realdentry, name); } revert_creds(old_cred); @@ -242,12 +243,13 @@ out: return err; } -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { - struct dentry *realdentry = ovl_dentry_real(dentry); ssize_t res; const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); old_cred = ovl_override_creds(dentry->d_sb); res = vfs_getxattr(realdentry, name, value, size); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 9bc0e580a5b3..8aef2b304b2d 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -397,8 +397,19 @@ int ovl_verify_index(struct dentry *index, struct path *lowerstack, if (!d_inode(index)) return 0; - err = -EISDIR; - if (d_is_dir(index)) + /* + * Directory index entries are going to be used for looking up + * redirected upper dirs by lower dir fh when decoding an overlay + * file handle of a merge dir. Whiteout index entries are going to be + * used as an indication that an exported overlay file handle should + * be treated as stale (i.e. after unlink of the overlay inode). + * We don't know the verification rules for directory and whiteout + * index entries, because they have not been implemented yet, so return + * EROFS if those entries are found to avoid corrupting an index that + * was created by a newer kernel. + */ + err = -EROFS; + if (d_is_dir(index) || ovl_is_whiteout(index)) goto fail; err = -EINVAL; @@ -436,8 +447,8 @@ out: return err; fail: - pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, err=%i)\n", - index, err); + pr_warn_ratelimited("overlayfs: failed to verify index (%pd2, ftype=%x, err=%i)\n", + index, d_inode(index)->i_mode & S_IFMT, err); goto out; } @@ -502,6 +513,7 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, goto out; } + inode = d_inode(index); if (d_is_negative(index)) { if (upper && d_inode(origin)->i_nlink > 1) { pr_warn_ratelimited("overlayfs: hard link with origin but no index (ino=%lu).\n", @@ -511,11 +523,22 @@ static struct dentry *ovl_lookup_index(struct dentry *dentry, dput(index); index = NULL; - } else if (upper && d_inode(index) != d_inode(upper)) { - inode = d_inode(index); - pr_warn_ratelimited("overlayfs: wrong index found (index ino: %lu, upper ino: %lu).\n", - d_inode(index)->i_ino, - d_inode(upper)->i_ino); + } else if (upper && d_inode(upper) != inode) { + pr_warn_ratelimited("overlayfs: wrong index found (index=%pd2, ino=%lu, upper ino=%lu).\n", + index, inode->i_ino, d_inode(upper)->i_ino); + goto fail; + } else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) || + ((inode->i_mode ^ d_inode(origin)->i_mode) & S_IFMT)) { + /* + * Index should always be of the same file type as origin + * except for the case of a whiteout index. A whiteout + * index should only exist if all lower aliases have been + * unlinked, which means that finding a lower origin on lookup + * whose index is a whiteout should be treated as an error. + */ + pr_warn_ratelimited("overlayfs: bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n", + index, d_inode(index)->i_mode & S_IFMT, + d_inode(origin)->i_mode & S_IFMT); goto fail; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 60d26605e039..e927a62c97ae 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -47,7 +47,8 @@ enum ovl_flag { /* Is the real inode encoded in fid an upper inode? */ #define OVL_FH_FLAG_PATH_UPPER (1 << 2) -#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN) +#define OVL_FH_FLAG_ALL (OVL_FH_FLAG_BIG_ENDIAN | OVL_FH_FLAG_ANY_ENDIAN | \ + OVL_FH_FLAG_PATH_UPPER) #if defined(__LITTLE_ENDIAN) #define OVL_FH_FLAG_CPU_ENDIAN 0 @@ -199,6 +200,7 @@ enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); struct dentry *ovl_dentry_lower(struct dentry *dentry); struct dentry *ovl_dentry_real(struct dentry *dentry); +struct dentry *ovl_i_dentry_upper(struct inode *inode); struct inode *ovl_inode_upper(struct inode *inode); struct inode *ovl_inode_lower(struct inode *inode); struct inode *ovl_inode_real(struct inode *inode); @@ -270,9 +272,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); int ovl_permission(struct inode *inode, int mask); -int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags); -int ovl_xattr_get(struct dentry *dentry, const char *name, +int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, + const void *value, size_t size, int flags); +int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 0298463cf9c3..3d424a51cabb 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -703,7 +703,10 @@ int ovl_indexdir_cleanup(struct dentry *dentry, struct vfsmount *mnt, err = PTR_ERR(index); break; } - if (ovl_verify_index(index, lowerstack, numlower)) { + err = ovl_verify_index(index, lowerstack, numlower); + if (err) { + if (err == -EROFS) + break; err = ovl_cleanup(dir, index); if (err) break; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 44dc2d6ffe0f..d86e89f97201 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -692,7 +692,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, handler->name, buffer, size); + return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } static int __maybe_unused @@ -742,7 +742,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, return err; } - err = ovl_xattr_set(dentry, handler->name, value, size, flags); + err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); if (!err) ovl_copyattr(ovl_inode_real(inode), inode); @@ -772,7 +772,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size) { - return ovl_xattr_get(dentry, name, buffer, size); + return ovl_xattr_get(dentry, inode, name, buffer, size); } static int ovl_other_xattr_set(const struct xattr_handler *handler, @@ -780,7 +780,7 @@ static int ovl_other_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { - return ovl_xattr_set(dentry, name, value, size, flags); + return ovl_xattr_set(dentry, inode, name, value, size, flags); } static const struct xattr_handler __maybe_unused @@ -1058,10 +1058,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, OVL_INDEXDIR_NAME, true); - err = PTR_ERR(ufs->indexdir); - if (IS_ERR(ufs->indexdir)) - goto out_put_lower_mnt; - if (ufs->indexdir) { /* Verify upper root is index dir origin */ err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, @@ -1090,6 +1086,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) else sb->s_d_op = &ovl_dentry_operations; + err = -ENOMEM; ufs->creator_cred = cred = prepare_creds(); if (!cred) goto out_put_indexdir; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index c492ba75c659..f46ad75dc96a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -157,9 +157,14 @@ struct dentry *ovl_dentry_real(struct dentry *dentry) return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry); } +struct dentry *ovl_i_dentry_upper(struct inode *inode) +{ + return ovl_upperdentry_dereference(OVL_I(inode)); +} + struct inode *ovl_inode_upper(struct inode *inode) { - struct dentry *upperdentry = ovl_upperdentry_dereference(OVL_I(inode)); + struct dentry *upperdentry = ovl_i_dentry_upper(inode); return upperdentry ? d_inode(upperdentry) : NULL; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 18694598bebf..aa2b89071630 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -51,7 +51,7 @@ struct proc_dir_entry { spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ u8 namelen; char name[]; -}; +} __randomize_layout; union proc_op { int (*proc_get_link)(struct dentry *, struct path *); @@ -70,7 +70,7 @@ struct proc_inode { struct hlist_node sysctl_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; -}; +} __randomize_layout; /* * General functions @@ -279,7 +279,7 @@ struct proc_maps_private { #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif -}; +} __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3d2256a425ee..54415f0e3d18 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -23,7 +23,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) struct reiserfs_transaction_handle th; size_t jcreate_blocks; int size = acl ? posix_acl_xattr_size(acl->a_count) : 0; - + int update_mode = 0; + umode_t mode = inode->i_mode; /* * Pessimism: We can't assume that anything from the xattr root up @@ -37,7 +38,16 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) error = journal_begin(&th, inode->i_sb, jcreate_blocks); reiserfs_write_unlock(inode->i_sb); if (error == 0) { + if (type == ACL_TYPE_ACCESS && acl) { + error = posix_acl_update_mode(inode, &mode, &acl); + if (error) + goto unlock; + update_mode = 1; + } error = __reiserfs_set_acl(&th, inode, type, acl); + if (!error && update_mode) + inode->i_mode = mode; +unlock: reiserfs_write_lock(inode->i_sb); error2 = journal_end(&th); reiserfs_write_unlock(inode->i_sb); @@ -241,11 +251,6 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_update_mode(inode, &inode->i_mode, &acl); - if (error) - return error; - } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0a9880777c9c..c09c16b1ad3b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5435,6 +5435,7 @@ __xfs_bunmapi( xfs_fsblock_t sum; xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_fileoff_t max_len; + xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_); @@ -5534,6 +5535,17 @@ __xfs_bunmapi( */ del = got; wasdel = isnullstartblock(del.br_startblock); + + /* + * Make sure we don't touch multiple AGF headers out of order + * in a single transaction, as that could cause AB-BA deadlocks. + */ + if (!wasdel) { + agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); + if (prev_agno != NULLAGNUMBER && prev_agno > agno) + break; + prev_agno = agno; + } if (got.br_startoff < start) { del.br_startoff = start; del.br_blockcount -= start - got.br_startoff; @@ -6499,6 +6511,15 @@ xfs_bmap_finish_one( xfs_fsblock_t firstfsb; int error = 0; + /* + * firstfsb is tied to the transaction lifetime and is used to + * ensure correct AG locking order and schedule work item + * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us + * to only making one bmap call per transaction, so it should + * be safe to have it as a local variable here. + */ + firstfsb = NULLFSBLOCK; + trace_xfs_bmap_deferred(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4da85fff69ad..e0bcc4a59efd 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -728,7 +728,8 @@ xfs_btree_firstrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ @@ -757,7 +758,8 @@ xfs_btree_lastrec( * Get the block pointer for this level. */ block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); + if (xfs_btree_check_block(cur, block, level, bp)) + return 0; /* * It's empty, there is no such record. */ diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index d478065b9544..8727a43115ef 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -136,6 +136,8 @@ __xfs_dir3_data_check( */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + be16_to_cpu(dup->length)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == (char *)dup - (char *)hdr); @@ -164,6 +166,8 @@ __xfs_dir3_data_check( XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0); XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= + p + ops->data_entsize(dep->namelen)); XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(*ops->data_entry_tag_p(dep)) == (char *)dep - (char *)hdr); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 900ea231f9a3..45b1c3b4e047 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1638,6 +1638,10 @@ xfs_refcount_recover_cow_leftovers( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) goto out_trans; + if (!agbp) { + error = -ENOMEM; + goto out_trans; + } cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); /* Find all the leftover CoW staging extents. */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 6ce948c436d5..15751dc2a27d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -111,6 +111,9 @@ restart: skipped = 0; break; } + /* we're done if id overflows back to zero */ + if (!next_index) + break; } if (skipped) { diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ab2270a87196..f45fbf0db9bb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -170,6 +170,8 @@ xfs_reflink_find_shared( error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; + if (!agbp) + return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); @@ -329,7 +331,7 @@ xfs_reflink_convert_cow_extent( xfs_filblks_t count_fsb, struct xfs_defer_ops *dfops) { - xfs_fsblock_t first_block; + xfs_fsblock_t first_block = NULLFSBLOCK; int nimaps = 1; if (imap->br_state == XFS_EXT_NORM) |