diff options
Diffstat (limited to 'fs')
140 files changed, 10034 insertions, 3771 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 7b623e9fc1b0..708ba336e689 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig" source "fs/btrfs/Kconfig" source "fs/nilfs2/Kconfig" source "fs/f2fs/Kconfig" +source "fs/zonefs/Kconfig" config FS_DAX bool "Direct Access (DAX) support" @@ -264,6 +265,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/erofs/Kconfig" +source "fs/vboxsf/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 98be354fdb61..505e51166973 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -133,3 +133,5 @@ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ +obj-$(CONFIG_VBOXSF_FS) += vboxsf/ +obj-$(CONFIG_ZONEFS_FS) += zonefs/ diff --git a/fs/afs/super.c b/fs/afs/super.c index 7f8a9b3137bf..dda7a9a66848 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -38,13 +38,13 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); static int afs_init_fs_context(struct fs_context *fc); -static const struct fs_parameter_description afs_fs_parameters; +static const struct fs_parameter_spec afs_fs_parameters[]; struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .init_fs_context = afs_init_fs_context, - .parameters = &afs_fs_parameters, + .parameters = afs_fs_parameters, .kill_sb = afs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE, }; @@ -73,28 +73,22 @@ enum afs_param { Opt_source, }; -static const struct fs_parameter_spec afs_param_specs[] = { - fsparam_flag ("autocell", Opt_autocell), - fsparam_flag ("dyn", Opt_dyn), - fsparam_enum ("flock", Opt_flock), - fsparam_string("source", Opt_source), +static const struct constant_table afs_param_flock[] = { + {"local", afs_flock_mode_local }, + {"openafs", afs_flock_mode_openafs }, + {"strict", afs_flock_mode_strict }, + {"write", afs_flock_mode_write }, {} }; -static const struct fs_parameter_enum afs_param_enums[] = { - { Opt_flock, "local", afs_flock_mode_local }, - { Opt_flock, "openafs", afs_flock_mode_openafs }, - { Opt_flock, "strict", afs_flock_mode_strict }, - { Opt_flock, "write", afs_flock_mode_write }, +static const struct fs_parameter_spec afs_fs_parameters[] = { + fsparam_flag ("autocell", Opt_autocell), + fsparam_flag ("dyn", Opt_dyn), + fsparam_enum ("flock", Opt_flock, afs_param_flock), + fsparam_string("source", Opt_source), {} }; -static const struct fs_parameter_description afs_fs_parameters = { - .name = "kAFS", - .specs = afs_param_specs, - .enums = afs_param_enums, -}; - /* * initialise the filesystem */ @@ -323,7 +317,7 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) struct afs_fs_context *ctx = fc->fs_private; int opt; - opt = fs_parse(fc, &afs_fs_parameters, param, &result); + opt = fs_parse(fc, afs_fs_parameters, param, &result); if (opt < 0) return opt; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 44a3ce1e4ce4..1dc97f2d6201 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -396,7 +396,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, struct cachefiles_object *object; struct cachefiles_cache *cache; struct inode *inode; - sector_t block0, block; + sector_t block; unsigned shift; int ret; @@ -412,7 +412,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -428,12 +427,14 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, * enough for this as it doesn't indicate errors, but it's all we've * got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(ret < 0); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { @@ -711,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -728,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ret = space ? -ENODATA : -ENOBUFS; list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block0, block; + sector_t block; /* we assume the absence or presence of the first block is a * good enough indication for the page as a whole @@ -736,13 +736,14 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, * good enough for this as it doesn't indicate errors, but * it's all we've got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(!ret); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, - block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 73f24f307a4a..270b769607a2 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -67,7 +67,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc) if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len)) continue; - errorf(fc, "ceph: fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option", + errorfc(fc, "fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option", fsid); err = -EBUSY; goto out_unlock; @@ -96,7 +96,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc) list_add_tail(&ent->list, &ceph_fscache_list); } else { kfree(ent); - errorf(fc, "ceph: unable to register fscache cookie for fsid %pU", + errorfc(fc, "unable to register fscache cookie for fsid %pU", fsid); /* all other fs ignore this error */ } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index bfb8aead0555..1d9f083b8a11 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -162,13 +162,13 @@ enum ceph_recover_session_mode { ceph_recover_session_clean }; -static const struct fs_parameter_enum ceph_mount_param_enums[] = { - { Opt_recover_session, "no", ceph_recover_session_no }, - { Opt_recover_session, "clean", ceph_recover_session_clean }, +static const struct constant_table ceph_param_recover[] = { + { "no", ceph_recover_session_no }, + { "clean", ceph_recover_session_clean }, {} }; -static const struct fs_parameter_spec ceph_mount_param_specs[] = { +static const struct fs_parameter_spec ceph_mount_parameters[] = { fsparam_flag_no ("acl", Opt_acl), fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir), fsparam_s32 ("caps_max", Opt_caps_max), @@ -178,8 +178,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = { fsparam_flag_no ("copyfrom", Opt_copyfrom), fsparam_flag_no ("dcache", Opt_dcache), fsparam_flag_no ("dirstat", Opt_dirstat), - __fsparam (fs_param_is_string, "fsc", Opt_fscache, - fs_param_neg_with_no | fs_param_v_optional), + fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc + fsparam_string ("fsc", Opt_fscache), // fsc=... fsparam_flag_no ("ino32", Opt_ino32), fsparam_string ("mds_namespace", Opt_mds_namespace), fsparam_flag_no ("poolperm", Opt_poolperm), @@ -188,7 +188,7 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = { fsparam_flag_no ("rbytes", Opt_rbytes), fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes), fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries), - fsparam_enum ("recover_session", Opt_recover_session), + fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover), fsparam_flag_no ("require_active_mds", Opt_require_active_mds), fsparam_u32 ("rsize", Opt_rsize), fsparam_string ("snapdirname", Opt_snapdirname), @@ -197,12 +197,6 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = { {} }; -static const struct fs_parameter_description ceph_mount_parameters = { - .name = "ceph", - .specs = ceph_mount_param_specs, - .enums = ceph_mount_param_enums, -}; - struct ceph_parse_opts_ctx { struct ceph_options *copts; struct ceph_mount_options *opts; @@ -226,7 +220,7 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) dout("%s '%s'\n", __func__, dev_name); if (!dev_name || !*dev_name) - return invalf(fc, "ceph: Empty source"); + return invalfc(fc, "Empty source"); dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { @@ -245,14 +239,14 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc) dev_name_end--; /* back up to ':' separator */ if (dev_name_end < dev_name || *dev_name_end != ':') - return invalf(fc, "ceph: No path or : separator in source"); + return invalfc(fc, "No path or : separator in source"); dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); if (fsopt->server_path) dout("server path '%s'\n", fsopt->server_path); ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name, - pctx->copts, fc); + pctx->copts, fc->log.log); if (ret) return ret; @@ -270,11 +264,11 @@ static int ceph_parse_mount_param(struct fs_context *fc, unsigned int mode; int token, ret; - ret = ceph_parse_param(param, pctx->copts, fc); + ret = ceph_parse_param(param, pctx->copts, fc->log.log); if (ret != -ENOPARAM) return ret; - token = fs_parse(fc, &ceph_mount_parameters, param, &result); + token = fs_parse(fc, ceph_mount_parameters, param, &result); dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); if (token < 0) return token; @@ -301,7 +295,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, break; case Opt_source: if (fc->source) - return invalf(fc, "ceph: Multiple sources specified"); + return invalfc(fc, "Multiple sources specified"); return ceph_parse_source(param, fc); case Opt_wsize: if (result.uint_32 < PAGE_SIZE || @@ -392,7 +386,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, } break; #else - return invalf(fc, "ceph: fscache support is disabled"); + return invalfc(fc, "fscache support is disabled"); #endif case Opt_poolperm: if (!result.negated) @@ -423,7 +417,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, #ifdef CONFIG_CEPH_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else - return invalf(fc, "ceph: POSIX ACL support is disabled"); + return invalfc(fc, "POSIX ACL support is disabled"); #endif } else { fc->sb_flags &= ~SB_POSIXACL; @@ -435,7 +429,7 @@ static int ceph_parse_mount_param(struct fs_context *fc, return 0; out_of_range: - return invalf(fc, "ceph: %s out of range", param->key); + return invalfc(fc, "%s out of range", param->key); } static void destroy_mount_options(struct ceph_mount_options *args) @@ -1101,7 +1095,7 @@ static int ceph_get_tree(struct fs_context *fc) dout("ceph_get_tree\n"); if (!fc->source) - return invalf(fc, "ceph: No source"); + return invalfc(fc, "No source"); #ifdef CONFIG_CEPH_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 0f0dc1c1fe41..153d5c842a9b 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -65,6 +65,11 @@ struct smb3_key_debug_info { __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; +struct smb3_notify { + __u32 completion_filter; + bool watch_tree; +} __packed; + #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) #define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) @@ -72,3 +77,4 @@ struct smb3_key_debug_info { #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array) #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info) #define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info) +#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fb41e51dd574..440828afcdde 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1084,7 +1084,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, struct cifs_ntsd *pntsd = NULL; int oplock = 0; unsigned int xid; - int rc, create_options = 0; + int rc; struct cifs_tcon *tcon; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct cifs_fid fid; @@ -1096,13 +1096,10 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = READ_CONTROL; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1147,7 +1144,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, { int oplock = 0; unsigned int xid; - int rc, access_flags, create_options = 0; + int rc, access_flags; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1160,9 +1157,6 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -1171,7 +1165,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = access_flags; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5492b9860baa..febab27cd838 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,7 +275,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = 0; /* unlimited */ if (server->ops->queryfs) - rc = server->ops->queryfs(xid, tcon, buf); + rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); return 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 239338d57086..de82cfa44b1a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -298,7 +298,8 @@ struct smb_version_operations { const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ - void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); + void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); @@ -409,7 +410,7 @@ struct smb_version_operations { struct cifsInodeInfo *); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, - struct kstatfs *); + struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); @@ -430,6 +431,8 @@ struct smb_version_operations { struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); + int (*notify)(const unsigned int xid, struct file *pfile, + void __user *pbuf); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); @@ -490,6 +493,7 @@ struct smb_version_operations { /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 948bf3474db1..89eaaf46d1ca 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -612,4 +612,12 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, } #endif +static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) +{ + if (cifs_sb && (backup_cred(cifs_sb))) + return options | CREATE_OPEN_BACKUP_INTENT; + else + return options; +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a481296f417f..3c89569e7210 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -260,7 +260,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (server->tcpStatus != CifsNeedReconnect) break; - if (--retries) + if (retries && --retries) continue; /* diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0aa3623ae0e1..a941ac7a659d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4365,7 +4365,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(*xid, tcon); + server->ops->qfs_tcon(*xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f3b79012ff29..0ef099442f20 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -355,13 +355,10 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a4e8f7d445ac..bc9516ab4b34 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -222,9 +222,6 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (!buf) return -ENOMEM; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -235,7 +232,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; @@ -752,9 +749,6 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) desired_access = cifs_convert_flags(cfile->f_flags); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -768,7 +762,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = &cfile->fid; @@ -2599,8 +2593,10 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(inode->i_ino, rc); return rc; + } xid = get_xid(); @@ -2638,8 +2634,10 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(file_inode(file)->i_ino, rc); return rc; + } xid = get_xid(); @@ -2672,7 +2670,8 @@ int cifs_flush(struct file *file, fl_owner_t id) rc = filemap_write_and_wait(inode->i_mapping); cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); - + if (rc) + trace_cifs_flush_err(inode->i_ino, rc); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 676e96a7a9f0..9ba623b601ec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -475,9 +475,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1285,7 +1283,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = DELETE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1823,7 +1821,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, oparms.cifs_sb = cifs_sb; /* open the file to be renamed -- we need DELETE perms */ oparms.desired_access = DELETE; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = from_path; oparms.fid = &fid; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 1a01e108d75e..4a73e63c4d43 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -65,7 +65,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep, if (tcon->ses->server->ops->ioctl_query_info) rc = tcon->ses->server->ops->ioctl_query_info( - xid, tcon, utf16_path, + xid, tcon, cifs_sb, utf16_path, filep->private_data ? 0 : 1, p); else rc = -EOPNOTSUPP; @@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; __u64 caps; @@ -299,6 +300,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = 0; break; + case CIFS_IOC_NOTIFY: + if (!S_ISDIR(inode->i_mode)) { + /* Notify can only be done on directories */ + rc = -EOPNOTSUPP; + break; + } + cifs_sb = CIFS_SB(inode->i_sb); + tcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + if (tcon && tcon->ses->server->ops->notify) { + rc = tcon->ses->server->ops->notify(xid, + filep, (void __user *)arg); + cifs_dbg(FYI, "ioctl notify rc %d\n", rc); + } else + rc = -EOPNOTSUPP; + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b736acd3917b..852aa00ec729 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -315,7 +315,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -353,15 +353,11 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; - - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.path = path; oparms.fid = &fid; @@ -402,9 +398,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.fid = &fid; oparms.reconnect = false; @@ -457,14 +451,10 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_dbg(FYI, "%s: path: %s\n", __func__, path); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); @@ -474,7 +464,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d17587c2c4ab..ba9dadf3be24 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -196,7 +196,8 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f0795c856d8f..43a88e26d26b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -101,7 +101,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses) iface_count = ses->iface_count; if (iface_count <= 0) { spin_unlock(&ses->iface_lock); - cifs_dbg(FYI, "no iface list available to open channels\n"); + cifs_dbg(VFS, "no iface list available to open channels\n"); return 0; } ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces), diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d70a2bb062df..eb994e313c6a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -504,7 +504,8 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static void -cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); @@ -565,7 +566,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -793,7 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -872,7 +873,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, static int cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc = -EOPNOTSUPP; @@ -970,7 +971,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = OPEN_REPARSE_POINT; + oparms.create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1029,7 +1031,6 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -1090,13 +1091,11 @@ cifs_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 5ef5e97a6d13..1cf207564ff9 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -99,9 +99,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = create_disposition; - oparms.create_options = create_options; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; oparms.mode = mode; @@ -457,7 +455,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* If it is a root and its handle is cached then use it */ if (!strlen(full_path) && !no_cached_open) { - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) goto out; @@ -474,9 +472,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6787fce26f20..baa825f4cec0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -655,7 +655,8 @@ smb2_cached_lease_break(struct work_struct *work) /* * Open the directory at the root of a share */ -int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) +int open_shroot(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid) { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = ses->server; @@ -702,7 +703,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; oparms.tcon = tcon; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; oparms.fid = pfid; @@ -818,7 +819,8 @@ oshr_free: } static void -smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -830,7 +832,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -838,7 +840,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL); else - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) return; @@ -860,7 +862,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) } static void -smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -871,7 +874,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -906,10 +909,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1151,10 +1151,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1422,6 +1419,7 @@ req_res_key_exit: static int smb2_ioctl_query_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p) { @@ -1447,6 +1445,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct kvec close_iov[1]; unsigned int size[2]; void *data[2]; + int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; memset(rqst, 0, sizeof(rqst)); resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; @@ -1477,10 +1476,7 @@ smb2_ioctl_query_info(const unsigned int xid, memset(&oparms, 0, sizeof(oparms)); oparms.tcon = tcon; oparms.disposition = FILE_OPEN; - if (is_dir) - oparms.create_options = CREATE_NOT_FILE; - else - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2049,6 +2045,66 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + + +static int +smb3_notify(const unsigned int xid, struct file *pfile, + void __user *ioc_buf) +{ + struct smb3_notify notify; + struct dentry *dentry = pfile->f_path.dentry; + struct inode *inode = file_inode(pfile); + struct cifs_sb_info *cifs_sb; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct cifs_tcon *tcon; + unsigned char *path = NULL; + __le16 *utf16_path = NULL; + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + int rc = 0; + + path = build_path_from_dentry(dentry); + if (path == NULL) + return -ENOMEM; + + cifs_sb = CIFS_SB(inode->i_sb); + + utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb); + if (utf16_path == NULL) { + rc = -ENOMEM; + goto notify_exit; + } + + if (copy_from_user(¬ify, ioc_buf, sizeof(struct smb3_notify))) { + rc = -EFAULT; + goto notify_exit; + } + + tcon = cifs_sb_master_tcon(cifs_sb); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); + if (rc) + goto notify_exit; + + rc = SMB2_change_notify(xid, tcon, fid.persistent_fid, fid.volatile_fid, + notify.watch_tree, notify.completion_filter); + + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + + cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc); + +notify_exit: + kfree(path); + kfree(utf16_path); + return rc; +} + static int smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, @@ -2086,10 +2142,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = fid; oparms.reconnect = false; @@ -2343,10 +2396,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = FILE_OPEN; - if (cifs_sb && backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2402,7 +2452,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { struct smb2_query_info_rsp *rsp; struct smb2_fs_full_size_info *info = NULL; @@ -2417,7 +2467,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, FS_FULL_SIZE_INFORMATION, SMB2_O_INFO_FILESYSTEM, sizeof(struct smb2_fs_full_size_info), - &rsp_iov, &buftype, NULL); + &rsp_iov, &buftype, cifs_sb); if (rc) goto qfs_exit; @@ -2439,7 +2489,7 @@ qfs_exit: static int smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -2448,12 +2498,12 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; if (!tcon->posix_extensions) - return smb2_queryfs(xid, tcon, buf); + return smb2_queryfs(xid, tcon, cifs_sb, buf); oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2722,6 +2772,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; + int create_options = is_reparse_point ? OPEN_REPARSE_POINT : 0; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -2748,14 +2799,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (is_reparse_point) - oparms.create_options = OPEN_REPARSE_POINT; - + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2934,11 +2978,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; @@ -2949,6 +2988,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.desired_access = READ_CONTROL; oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2990,11 +3030,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -3009,6 +3044,7 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.desired_access = access_flags; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -4491,7 +4527,6 @@ smb2_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -4527,13 +4562,11 @@ smb2_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; @@ -4868,6 +4901,7 @@ struct smb_version_operations smb30_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, @@ -4978,6 +5012,7 @@ struct smb_version_operations smb311_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 14f209f7376f..1234f9ccab03 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -397,6 +402,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } @@ -1933,6 +1939,16 @@ parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf) buf->IndexNumber = pdisk_id->DiskFileId; } +static void +parse_posix_ctxt(struct create_context *cc, struct smb_posix_info *pposix_inf) +{ + /* struct smb_posix_info *ppinf = (struct smb_posix_info *)cc; */ + + /* TODO: Need to add parsing for the context and return */ + printk_once(KERN_WARNING + "SMB3 3.11 POSIX response context not completed yet\n"); +} + void smb2_parse_contexts(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, @@ -1944,6 +1960,9 @@ smb2_parse_contexts(struct TCP_Server_Info *server, unsigned int next; unsigned int remaining; char *name; + const char smb3_create_tag_posix[] = {0x93, 0xAD, 0x25, 0x50, 0x9C, + 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83, + 0xDE, 0x96, 0x8B, 0xCD, 0x7C}; *oplock = 0; data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); @@ -1963,6 +1982,15 @@ smb2_parse_contexts(struct TCP_Server_Info *server, else if (buf && (le16_to_cpu(cc->NameLength) == 4) && strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) parse_query_id_ctxt(cc, buf); + else if ((le16_to_cpu(cc->NameLength) == 16)) { + if (memcmp(name, smb3_create_tag_posix, 16) == 0) + parse_posix_ctxt(cc, NULL); + } + /* else { + cifs_dbg(FYI, "Context not matched with len %d\n", + le16_to_cpu(cc->NameLength)); + cifs_dump_mem("Cctxt name: ", name, 4); + } */ next = le32_to_cpu(cc->Next); if (!next) @@ -3357,6 +3385,7 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; + /* See note 354 of MS-SMB2, 64K max */ req->OutputBufferLength = cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); req->CompletionFilter = cpu_to_le32(completion_filter); @@ -4023,6 +4052,9 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->cfile->fid.persistent_fid, tcon->tid, tcon->ses->Suid, wdata->offset, wdata->bytes, wdata->result); + if (wdata->result == -ENOSPC) + printk_once(KERN_WARNING "Out of space writing to %s\n", + tcon->treeName); } else trace_smb3_write_done(0 /* no xid */, wdata->cfile->fid.persistent_fid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4c43dbd1e089..fa03df130f1a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1519,6 +1519,7 @@ struct smb3_fs_vol_info { #define FILE_NORMALIZED_NAME_INFORMATION 48 #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 #define FILE_STANDARD_LINK_INFORMATION 54 +#define FILE_ID_INFORMATION 59 struct smb2_file_internal_info { __le64 IndexNumber; @@ -1593,6 +1594,21 @@ struct smb2_file_network_open_info { __le32 Reserved; } __packed; /* level 34 Query also similar returned in close rsp and open rsp */ +/* See MS-FSCC 2.4.43 */ +struct smb2_file_id_information { + __le64 VolumeSerialNumber; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; /* level 59 */ + extern char smb2_padding[7]; +/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +struct smb_posix_info { + __le32 nlink; + __le32 reparse_tag; + __le32 mode; + kuid_t uid; + kuid_t gid; +}; #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 6c678e00046f..de6388ef344f 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -68,7 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_fid *pfid); + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid); extern void close_shroot(struct cached_fid *cfid); extern void close_shroot_lease(struct cached_fid *cfid); extern void close_shroot_lease_locked(struct cached_fid *cfid); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index fe6acfce3390..08b703b7a15e 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -104,13 +104,14 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; struct cifs_ses *ses = NULL; + struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { if (ses->Suid == ses_id) goto found; } diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index e7e350b13d6a..4cb0d5f7ce45 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -547,6 +547,33 @@ DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \ DEFINE_SMB3_EXIT_ERR_EVENT(exit_err); + +DECLARE_EVENT_CLASS(smb3_sync_err_class, + TP_PROTO(unsigned long ino, + int rc), + TP_ARGS(ino, rc), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(int, rc) + ), + TP_fast_assign( + __entry->ino = ino; + __entry->rc = rc; + ), + TP_printk("\tino=%lu rc=%d", + __entry->ino, __entry->rc) +) + +#define DEFINE_SMB3_SYNC_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_sync_err_class, cifs_##name, \ + TP_PROTO(unsigned long ino, \ + int rc), \ + TP_ARGS(ino, rc)) + +DEFINE_SMB3_SYNC_ERR_EVENT(fsync_err); +DEFINE_SMB3_SYNC_ERR_EVENT(flush_err); + + DECLARE_EVENT_CLASS(smb3_enter_exit_class, TP_PROTO(unsigned int xid, const char *func_name), diff --git a/fs/coredump.c b/fs/coredump.c index b1ea7dfbd149..f8296a82d01d 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file) pipe_lock(pipe); pipe->readers++; pipe->writers--; - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); pipe_unlock(pipe); @@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file) * We actually want wait_event_freezable() but then we need * to clear TIF_SIGPENDING and improve dump_interrupted(). */ - wait_event_interruptible(pipe->wait, pipe->readers == 1); + wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); pipe_lock(pipe); pipe->readers--; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 2f04024c3588..912308600d39 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -534,7 +534,7 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc, /* check for wrong endianness */ if (super->magic == CRAMFS_MAGIC_WEND) { if (!silent) - errorf(fc, "cramfs: wrong endianness"); + errorfc(fc, "wrong endianness"); return -EINVAL; } @@ -546,22 +546,22 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc, mutex_unlock(&read_mutex); if (super->magic != CRAMFS_MAGIC) { if (super->magic == CRAMFS_MAGIC_WEND && !silent) - errorf(fc, "cramfs: wrong endianness"); + errorfc(fc, "wrong endianness"); else if (!silent) - errorf(fc, "cramfs: wrong magic"); + errorfc(fc, "wrong magic"); return -EINVAL; } } /* get feature flags first */ if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) { - errorf(fc, "cramfs: unsupported filesystem features"); + errorfc(fc, "unsupported filesystem features"); return -EINVAL; } /* Check that the root inode is in a sane state */ if (!S_ISDIR(super->root.mode)) { - errorf(fc, "cramfs: root is not a directory"); + errorfc(fc, "root is not a directory"); return -EINVAL; } /* correct strange, hard-coded permissions of mkcramfs */ @@ -580,12 +580,12 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc, sbi->magic = super->magic; sbi->flags = super->flags; if (root_offset == 0) - infof(fc, "cramfs: empty filesystem"); + infofc(fc, "empty filesystem"); else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && ((root_offset != sizeof(struct cramfs_super)) && (root_offset != 512 + sizeof(struct cramfs_super)))) { - errorf(fc, "cramfs: bad root offset %lu", root_offset); + errorfc(fc, "bad root offset %lu", root_offset); return -EINVAL; } @@ -937,12 +937,11 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, * on persistent storage prior to completion of the operation. */ int dax_writeback_mapping_range(struct address_space *mapping, - struct block_device *bdev, struct writeback_control *wbc) + struct dax_device *dax_dev, struct writeback_control *wbc) { XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT); struct inode *inode = mapping->host; pgoff_t end_index = wbc->range_end >> PAGE_SHIFT; - struct dax_device *dax_dev; void *entry; int ret = 0; unsigned int scanned = 0; @@ -953,10 +952,6 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL) return 0; - dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); - if (!dax_dev) - return -EIO; - trace_dax_writeback_range(inode, xas.xa_index, end_index); tag_pages_for_writeback(mapping, xas.xa_index, end_index); @@ -977,7 +972,6 @@ int dax_writeback_mapping_range(struct address_space *mapping, xas_lock_irq(&xas); } xas_unlock_irq(&xas); - put_dax(dax_dev); trace_dax_writeback_range_done(inode, xas.xa_index, end_index); return ret; } @@ -1207,6 +1201,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index cffa0c1ec829..019572c6b39a 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -524,16 +524,12 @@ out: static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) { - int rc = 0; - struct inode *inode; - struct inode *lower_inode; - - inode = (struct inode *)mapping->host; - lower_inode = ecryptfs_inode_to_lower(inode); - if (lower_inode->i_mapping->a_ops->bmap) - rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, - block); - return rc; + struct inode *lower_inode = ecryptfs_inode_to_lower(mapping->host); + int ret = bmap(lower_inode, &block); + + if (ret) + return 0; + return block; } const struct address_space_operations ecryptfs_aops = { diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 119667e65890..c885cf7d724b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -960,8 +960,9 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc) static int ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc) { - return dax_writeback_mapping_range(mapping, - mapping->host->i_sb->s_bdev, wbc); + struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb); + + return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); } const struct address_space_operations ext2_aops = { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3313168b680f..1305b810c44a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2867,7 +2867,7 @@ static int ext4_dax_writepages(struct address_space *mapping, percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc); + ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); percpu_up_read(&sbi->s_journal_flag_rwsem); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8bd9afa81c54..b27b72107911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3666,12 +3666,16 @@ static int check_swap_activate(struct swap_info_struct *sis, page_no < sis->max) { unsigned block_in_page; sector_t first_block; + sector_t block = 0; + int err = 0; cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + block = probe_block; + err = bmap(inode, &block); + if (err || !block) goto bad_bmap; + first_block = block; /* * It must be PAGE_SIZE aligned on-disk @@ -3683,11 +3687,13 @@ static int check_swap_activate(struct swap_info_struct *sis, for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { - sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + err = bmap(inode, &block); + + if (err || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; diff --git a/fs/filesystems.c b/fs/filesystems.c index 9135646e41ac..77bf5f95362d 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -74,7 +74,8 @@ int register_filesystem(struct file_system_type * fs) int res = 0; struct file_system_type ** p; - if (fs->parameters && !fs_validate_description(fs->parameters)) + if (fs->parameters && + !fs_validate_description(fs->name, fs->parameters)) return -EINVAL; BUG_ON(strchr(fs->name, '.')); diff --git a/fs/fs_context.c b/fs/fs_context.c index 138b5b4d621d..fc9f6ef93b55 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -45,6 +45,7 @@ static const struct constant_table common_set_sb_flag[] = { { "posixacl", SB_POSIXACL }, { "ro", SB_RDONLY }, { "sync", SB_SYNCHRONOUS }, + { }, }; static const struct constant_table common_clear_sb_flag[] = { @@ -53,6 +54,7 @@ static const struct constant_table common_clear_sb_flag[] = { { "nomand", SB_MANDLOCK }, { "rw", SB_RDONLY }, { "silent", SB_SILENT }, + { }, }; static const char *const forbidden_sb_flag[] = { @@ -175,14 +177,15 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, struct fs_parameter param = { .key = key, - .type = fs_value_is_string, + .type = fs_value_is_flag, .size = v_size, }; - if (v_size > 0) { + if (value) { param.string = kmemdup_nul(value, v_size, GFP_KERNEL); if (!param.string) return -ENOMEM; + param.type = fs_value_is_string; } ret = vfs_parse_fs_param(fc, ¶m); @@ -268,6 +271,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, fc->fs_type = get_filesystem(fs_type); fc->cred = get_current_cred(); fc->net_ns = get_net(current->nsproxy->net_ns); + fc->log.prefix = fs_type->name; mutex_init(&fc->uapi_mutex); @@ -361,8 +365,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc) get_net(fc->net_ns); get_user_ns(fc->user_ns); get_cred(fc->cred); - if (fc->log) - refcount_inc(&fc->log->usage); + if (fc->log.log) + refcount_inc(&fc->log.log->usage); /* Can't call put until we've called ->dup */ ret = fc->ops->dup(fc, src_fc); @@ -385,64 +389,33 @@ EXPORT_SYMBOL(vfs_dup_fs_context); * @fc: The filesystem context to log to. * @fmt: The format of the buffer. */ -void logfc(struct fs_context *fc, const char *fmt, ...) +void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...) { - static const char store_failure[] = "OOM: Can't store error string"; - struct fc_log *log = fc ? fc->log : NULL; - const char *p; va_list va; - char *q; - u8 freeable; + struct va_format vaf = {.fmt = fmt, .va = &va}; va_start(va, fmt); - if (!strchr(fmt, '%')) { - p = fmt; - goto unformatted_string; - } - if (strcmp(fmt, "%s") == 0) { - p = va_arg(va, const char *); - goto unformatted_string; - } - - q = kvasprintf(GFP_KERNEL, fmt, va); -copied_string: - if (!q) - goto store_failure; - freeable = 1; - goto store_string; - -unformatted_string: - if ((unsigned long)p >= (unsigned long)__start_rodata && - (unsigned long)p < (unsigned long)__end_rodata) - goto const_string; - if (log && within_module_core((unsigned long)p, log->owner)) - goto const_string; - q = kstrdup(p, GFP_KERNEL); - goto copied_string; - -store_failure: - p = store_failure; -const_string: - q = (char *)p; - freeable = 0; -store_string: if (!log) { - switch (fmt[0]) { + switch (level) { case 'w': - printk(KERN_WARNING "%s\n", q + 2); + printk(KERN_WARNING "%s%s%pV\n", prefix ? prefix : "", + prefix ? ": " : "", &vaf); break; case 'e': - printk(KERN_ERR "%s\n", q + 2); + printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "", + prefix ? ": " : "", &vaf); break; default: - printk(KERN_NOTICE "%s\n", q + 2); + printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "", + prefix ? ": " : "", &vaf); break; } - if (freeable) - kfree(q); } else { unsigned int logsize = ARRAY_SIZE(log->buffer); u8 index; + char *q = kasprintf(GFP_KERNEL, "%c %s%s%pV\n", level, + prefix ? prefix : "", + prefix ? ": " : "", &vaf); index = log->head & (logsize - 1); BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) || @@ -454,9 +427,11 @@ store_string: log->tail++; } - log->buffer[index] = q; - log->need_free &= ~(1 << index); - log->need_free |= freeable << index; + log->buffer[index] = q ? q : "OOM: Can't store error string"; + if (q) + log->need_free |= 1 << index; + else + log->need_free &= ~(1 << index); log->head++; } va_end(va); @@ -468,12 +443,12 @@ EXPORT_SYMBOL(logfc); */ static void put_fc_log(struct fs_context *fc) { - struct fc_log *log = fc->log; + struct fc_log *log = fc->log.log; int i; if (log) { if (refcount_dec_and_test(&log->usage)) { - fc->log = NULL; + fc->log.log = NULL; for (i = 0; i <= 7; i++) if (log->need_free & (1 << i)) kfree(log->buffer[i]); diff --git a/fs/fs_parser.c b/fs/fs_parser.c index d1930adce68d..7e6fb43f9541 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -20,42 +20,66 @@ static const struct constant_table bool_names[] = { { "no", false }, { "true", true }, { "yes", true }, + { }, }; +static const struct constant_table * +__lookup_constant(const struct constant_table *tbl, const char *name) +{ + for ( ; tbl->name; tbl++) + if (strcmp(name, tbl->name) == 0) + return tbl; + return NULL; +} + /** * lookup_constant - Look up a constant by name in an ordered table * @tbl: The table of constants to search. - * @tbl_size: The size of the table. * @name: The name to look up. * @not_found: The value to return if the name is not found. */ -int __lookup_constant(const struct constant_table *tbl, size_t tbl_size, - const char *name, int not_found) +int lookup_constant(const struct constant_table *tbl, const char *name, int not_found) { - unsigned int i; + const struct constant_table *p = __lookup_constant(tbl, name); - for (i = 0; i < tbl_size; i++) - if (strcmp(name, tbl[i].name) == 0) - return tbl[i].value; + return p ? p->value : not_found; +} +EXPORT_SYMBOL(lookup_constant); - return not_found; +static inline bool is_flag(const struct fs_parameter_spec *p) +{ + return p->type == NULL; } -EXPORT_SYMBOL(__lookup_constant); static const struct fs_parameter_spec *fs_lookup_key( - const struct fs_parameter_description *desc, - const char *name) + const struct fs_parameter_spec *desc, + struct fs_parameter *param, bool *negated) { - const struct fs_parameter_spec *p; - - if (!desc->specs) - return NULL; - - for (p = desc->specs; p->name; p++) - if (strcmp(p->name, name) == 0) + const struct fs_parameter_spec *p, *other = NULL; + const char *name = param->key; + bool want_flag = param->type == fs_value_is_flag; + + *negated = false; + for (p = desc; p->name; p++) { + if (strcmp(p->name, name) != 0) + continue; + if (likely(is_flag(p) == want_flag)) return p; - - return NULL; + other = p; + } + if (want_flag) { + if (name[0] == 'n' && name[1] == 'o' && name[2]) { + for (p = desc; p->name; p++) { + if (strcmp(p->name, name + 2) != 0) + continue; + if (!(p->flags & fs_param_neg_with_no)) + continue; + *negated = true; + return p; + } + } + } + return other; } /* @@ -76,172 +100,38 @@ static const struct fs_parameter_spec *fs_lookup_key( * unknown parameters are okay and -EINVAL if there was a conversion issue or * the parameter wasn't recognised and unknowns aren't okay. */ -int fs_parse(struct fs_context *fc, - const struct fs_parameter_description *desc, +int __fs_parse(struct p_log *log, + const struct fs_parameter_spec *desc, struct fs_parameter *param, struct fs_parse_result *result) { const struct fs_parameter_spec *p; - const struct fs_parameter_enum *e; - int ret = -ENOPARAM, b; - result->has_value = !!param->string; - result->negated = false; result->uint_64 = 0; - p = fs_lookup_key(desc, param->key); - if (!p) { - /* If we didn't find something that looks like "noxxx", see if - * "xxx" takes the "no"-form negative - but only if there - * wasn't an value. - */ - if (result->has_value) - goto unknown_parameter; - if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2]) - goto unknown_parameter; - - p = fs_lookup_key(desc, param->key + 2); - if (!p) - goto unknown_parameter; - if (!(p->flags & fs_param_neg_with_no)) - goto unknown_parameter; - result->boolean = false; - result->negated = true; - } + p = fs_lookup_key(desc, param, &result->negated); + if (!p) + return -ENOPARAM; if (p->flags & fs_param_deprecated) - warnf(fc, "%s: Deprecated parameter '%s'", - desc->name, param->key); - - if (result->negated) - goto okay; - - /* Certain parameter types only take a string and convert it. */ - switch (p->type) { - case __fs_param_wasnt_defined: - return -EINVAL; - case fs_param_is_u32: - case fs_param_is_u32_octal: - case fs_param_is_u32_hex: - case fs_param_is_s32: - case fs_param_is_u64: - case fs_param_is_enum: - case fs_param_is_string: - if (param->type != fs_value_is_string) - goto bad_value; - if (!result->has_value) { - if (p->flags & fs_param_v_optional) - goto okay; - goto bad_value; - } - /* Fall through */ - default: - break; - } + warn_plog(log, "Deprecated parameter '%s'", param->key); /* Try to turn the type we were given into the type desired by the * parameter and give an error if we can't. */ - switch (p->type) { - case fs_param_is_flag: - if (param->type != fs_value_is_flag && - (param->type != fs_value_is_string || result->has_value)) - return invalf(fc, "%s: Unexpected value for '%s'", - desc->name, param->key); - result->boolean = true; - goto okay; - - case fs_param_is_bool: - switch (param->type) { - case fs_value_is_flag: - result->boolean = true; - goto okay; - case fs_value_is_string: - if (param->size == 0) { - result->boolean = true; - goto okay; - } - b = lookup_constant(bool_names, param->string, -1); - if (b == -1) - goto bad_value; - result->boolean = b; - goto okay; - default: - goto bad_value; - } - - case fs_param_is_u32: - ret = kstrtouint(param->string, 0, &result->uint_32); - goto maybe_okay; - case fs_param_is_u32_octal: - ret = kstrtouint(param->string, 8, &result->uint_32); - goto maybe_okay; - case fs_param_is_u32_hex: - ret = kstrtouint(param->string, 16, &result->uint_32); - goto maybe_okay; - case fs_param_is_s32: - ret = kstrtoint(param->string, 0, &result->int_32); - goto maybe_okay; - case fs_param_is_u64: - ret = kstrtoull(param->string, 0, &result->uint_64); - goto maybe_okay; - - case fs_param_is_enum: - for (e = desc->enums; e->name[0]; e++) { - if (e->opt == p->opt && - strcmp(e->name, param->string) == 0) { - result->uint_32 = e->value; - goto okay; - } - } - goto bad_value; - - case fs_param_is_string: - goto okay; - case fs_param_is_blob: - if (param->type != fs_value_is_blob) - goto bad_value; - goto okay; - - case fs_param_is_fd: { - switch (param->type) { - case fs_value_is_string: - if (!result->has_value) - goto bad_value; - - ret = kstrtouint(param->string, 0, &result->uint_32); - break; - case fs_value_is_file: - result->uint_32 = param->dirfd; - ret = 0; - default: - goto bad_value; - } - - if (result->uint_32 > INT_MAX) - goto bad_value; - goto maybe_okay; - } - - case fs_param_is_blockdev: - case fs_param_is_path: - goto okay; - default: - BUG(); + if (is_flag(p)) { + if (param->type != fs_value_is_flag) + return inval_plog(log, "Unexpected value for '%s'", + param->key); + result->boolean = !result->negated; + } else { + int ret = p->type(log, p, param, result); + if (ret) + return ret; } - -maybe_okay: - if (ret < 0) - goto bad_value; -okay: return p->opt; - -bad_value: - return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key); -unknown_parameter: - return -ENOPARAM; } -EXPORT_SYMBOL(fs_parse); +EXPORT_SYMBOL(__fs_parse); /** * fs_lookup_param - Look up a path referred to by a parameter @@ -267,9 +157,6 @@ int fs_lookup_param(struct fs_context *fc, return PTR_ERR(f); put_f = true; break; - case fs_value_is_filename_empty: - flags = LOOKUP_EMPTY; - /* Fall through */ case fs_value_is_filename: f = param->name; put_f = false; @@ -302,6 +189,124 @@ out: } EXPORT_SYMBOL(fs_lookup_param); +int fs_param_bad_value(struct p_log *log, struct fs_parameter *param) +{ + return inval_plog(log, "Bad value for '%s'", param->key); +} + +int fs_param_is_bool(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + int b; + if (param->type != fs_value_is_string) + return fs_param_bad_value(log, param); + b = lookup_constant(bool_names, param->string, -1); + if (b == -1) + return fs_param_bad_value(log, param); + result->boolean = b; + return 0; +} +EXPORT_SYMBOL(fs_param_is_bool); + +int fs_param_is_u32(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + int base = (unsigned long)p->data; + if (param->type != fs_value_is_string || + kstrtouint(param->string, base, &result->uint_32) < 0) + return fs_param_bad_value(log, param); + return 0; +} +EXPORT_SYMBOL(fs_param_is_u32); + +int fs_param_is_s32(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + if (param->type != fs_value_is_string || + kstrtoint(param->string, 0, &result->int_32) < 0) + return fs_param_bad_value(log, param); + return 0; +} +EXPORT_SYMBOL(fs_param_is_s32); + +int fs_param_is_u64(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + if (param->type != fs_value_is_string || + kstrtoull(param->string, 0, &result->uint_64) < 0) + return fs_param_bad_value(log, param); + return 0; +} +EXPORT_SYMBOL(fs_param_is_u64); + +int fs_param_is_enum(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + const struct constant_table *c; + if (param->type != fs_value_is_string) + return fs_param_bad_value(log, param); + c = __lookup_constant(p->data, param->string); + if (!c) + return fs_param_bad_value(log, param); + result->uint_32 = c->value; + return 0; +} +EXPORT_SYMBOL(fs_param_is_enum); + +int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + if (param->type != fs_value_is_string || !*param->string) + return fs_param_bad_value(log, param); + return 0; +} +EXPORT_SYMBOL(fs_param_is_string); + +int fs_param_is_blob(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + if (param->type != fs_value_is_blob) + return fs_param_bad_value(log, param); + return 0; +} +EXPORT_SYMBOL(fs_param_is_blob); + +int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + switch (param->type) { + case fs_value_is_string: + if (kstrtouint(param->string, 0, &result->uint_32) < 0) + break; + if (result->uint_32 <= INT_MAX) + return 0; + break; + case fs_value_is_file: + result->uint_32 = param->dirfd; + if (result->uint_32 <= INT_MAX) + return 0; + break; + default: + break; + } + return fs_param_bad_value(log, param); +} +EXPORT_SYMBOL(fs_param_is_fd); + +int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + return 0; +} +EXPORT_SYMBOL(fs_param_is_blockdev); + +int fs_param_is_path(struct p_log *log, const struct fs_parameter_spec *p, + struct fs_parameter *param, struct fs_parse_result *result) +{ + return 0; +} +EXPORT_SYMBOL(fs_param_is_path); + #ifdef CONFIG_VALIDATE_FS_PARSER /** * validate_constant_table - Validate a constant table @@ -357,102 +362,26 @@ bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, * fs_validate_description - Validate a parameter description * @desc: The parameter description to validate. */ -bool fs_validate_description(const struct fs_parameter_description *desc) +bool fs_validate_description(const char *name, + const struct fs_parameter_spec *desc) { const struct fs_parameter_spec *param, *p2; - const struct fs_parameter_enum *e; - const char *name = desc->name; - unsigned int nr_params = 0; - bool good = true, enums = false; + bool good = true; pr_notice("*** VALIDATE %s ***\n", name); - if (!name[0]) { - pr_err("VALIDATE Parser: No name\n"); - name = "Unknown"; - good = false; - } - - if (desc->specs) { - for (param = desc->specs; param->name; param++) { - enum fs_parameter_type t = param->type; - - /* Check that the type is in range */ - if (t == __fs_param_wasnt_defined || - t >= nr__fs_parameter_type) { - pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n", - name, param->name, t); - good = false; - } else if (t == fs_param_is_enum) { - enums = true; - } - - /* Check for duplicate parameter names */ - for (p2 = desc->specs; p2 < param; p2++) { - if (strcmp(param->name, p2->name) == 0) { - pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n", - name, param->name); - good = false; - } - } - } - - nr_params = param - desc->specs; - } - - if (desc->enums) { - if (!nr_params) { - pr_err("VALIDATE %s: Enum table but no parameters\n", - name); - good = false; - goto no_enums; - } - if (!enums) { - pr_err("VALIDATE %s: Enum table but no enum-type values\n", - name); - good = false; - goto no_enums; - } - - for (e = desc->enums; e->name[0]; e++) { - /* Check that all entries in the enum table have at - * least one parameter that uses them. - */ - for (param = desc->specs; param->name; param++) { - if (param->opt == e->opt && - param->type != fs_param_is_enum) { - pr_err("VALIDATE %s: e[%tu] enum val for %s\n", - name, e - desc->enums, param->name); - good = false; - } - } - } - - /* Check that all enum-type parameters have at least one enum - * value in the enum table. - */ - for (param = desc->specs; param->name; param++) { - if (param->type != fs_param_is_enum) - continue; - for (e = desc->enums; e->name[0]; e++) - if (e->opt == param->opt) - break; - if (!e->name[0]) { - pr_err("VALIDATE %s: PARAM[%s] enum with no values\n", + for (param = desc; param->name; param++) { + /* Check for duplicate parameter names */ + for (p2 = desc; p2 < param; p2++) { + if (strcmp(param->name, p2->name) == 0) { + if (is_flag(param) != is_flag(p2)) + continue; + pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n", name, param->name); good = false; } } - } else { - if (enums) { - pr_err("VALIDATE %s: enum-type values, but no enum table\n", - name); - good = false; - goto no_enums; - } } - -no_enums: return good; } #endif /* CONFIG_VALIDATE_FS_PARSER */ diff --git a/fs/fsopen.c b/fs/fsopen.c index 043ffa8dc263..2fa3f241b762 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -25,7 +25,7 @@ static ssize_t fscontext_read(struct file *file, char __user *_buf, size_t len, loff_t *pos) { struct fs_context *fc = file->private_data; - struct fc_log *log = fc->log; + struct fc_log *log = fc->log.log; unsigned int logsize = ARRAY_SIZE(log->buffer); ssize_t ret; char *p; @@ -97,11 +97,11 @@ static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags) static int fscontext_alloc_log(struct fs_context *fc) { - fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL); - if (!fc->log) + fc->log.log = kzalloc(sizeof(*fc->log.log), GFP_KERNEL); + if (!fc->log.log) return -ENOMEM; - refcount_set(&fc->log->usage, 1); - fc->log->owner = fc->fs_type->owner; + refcount_set(&fc->log.log->usage, 1); + fc->log.log->owner = fc->fs_type->owner; return 0; } @@ -321,6 +321,7 @@ SYSCALL_DEFINE5(fsconfig, struct fs_context *fc; struct fd f; int ret; + int lookup_flags = 0; struct fs_parameter param = { .type = fs_value_is_undefined, @@ -409,19 +410,12 @@ SYSCALL_DEFINE5(fsconfig, goto out_key; } break; + case FSCONFIG_SET_PATH_EMPTY: + lookup_flags = LOOKUP_EMPTY; + /* fallthru */ case FSCONFIG_SET_PATH: param.type = fs_value_is_filename; - param.name = getname_flags(_value, 0, NULL); - if (IS_ERR(param.name)) { - ret = PTR_ERR(param.name); - goto out_key; - } - param.dirfd = aux; - param.size = strlen(param.name->name); - break; - case FSCONFIG_SET_PATH_EMPTY: - param.type = fs_value_is_filename_empty; - param.name = getname_flags(_value, LOOKUP_EMPTY, NULL); + param.name = getname_flags(_value, lookup_flags, NULL); if (IS_ERR(param.name)) { ret = PTR_ERR(param.name); goto out_key; diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 00015d851382..030f094910c3 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -451,8 +451,8 @@ static int cuse_send_init(struct cuse_conn *cc) ap->args.out_args[0].size = sizeof(ia->out); ap->args.out_args[0].value = &ia->out; ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; - ap->args.out_argvar = 1; - ap->args.out_pages = 1; + ap->args.out_argvar = true; + ap->args.out_pages = true; ap->num_pages = 1; ap->pages = &ia->page; ap->descs = &ia->desc; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ee190119f45c..de1e2fde60bd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -818,7 +818,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; - if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ce715380143c..9d67b830fb7a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -803,6 +803,10 @@ static int fuse_do_readpage(struct file *file, struct page *page) attr_ver = fuse_get_attr_version(fc); + /* Don't overflow end offset */ + if (pos + (desc.length - 1) == LLONG_MAX) + desc.length--; + fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); res = fuse_simple_request(fc, &ia.ap.args); if (res < 0) @@ -888,6 +892,14 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) ap->args.out_pages = true; ap->args.page_zeroing = true; ap->args.page_replace = true; + + /* Don't overflow end offset */ + if (pos + (count - 1) == LLONG_MAX) { + count--; + ap->descs[ap->num_pages - 1].length--; + } + WARN_ON((loff_t) (pos + count) < 0); + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); ia->read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { @@ -1397,9 +1409,9 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, } if (write) - ap->args.in_pages = 1; + ap->args.in_pages = true; else - ap->args.out_pages = 1; + ap->args.out_pages = true; *nbytesp = nbytes; @@ -1465,6 +1477,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, } ia = NULL; if (nres < 0) { + iov_iter_revert(iter, nbytes); err = nres; break; } @@ -1473,8 +1486,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, count -= nres; res += nres; pos += nres; - if (nres != nbytes) + if (nres != nbytes) { + iov_iter_revert(iter, nbytes - nres); break; + } if (count) { max_pages = iov_iter_npages(iter, fc->max_pages); ia = fuse_io_alloc(io, max_pages); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 16aec32f7f3d..95d712d44ca1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -448,7 +448,7 @@ enum { OPT_ERR }; -static const struct fs_parameter_spec fuse_param_specs[] = { +static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_string ("source", OPT_SOURCE), fsparam_u32 ("fd", OPT_FD), fsparam_u32oct ("rootmode", OPT_ROOTMODE), @@ -462,68 +462,63 @@ static const struct fs_parameter_spec fuse_param_specs[] = { {} }; -static const struct fs_parameter_description fuse_fs_parameters = { - .name = "fuse", - .specs = fuse_param_specs, -}; - static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct fuse_fs_context *ctx = fc->fs_private; int opt; - opt = fs_parse(fc, &fuse_fs_parameters, param, &result); + opt = fs_parse(fc, fuse_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case OPT_SOURCE: if (fc->source) - return invalf(fc, "fuse: Multiple sources specified"); + return invalfc(fc, "Multiple sources specified"); fc->source = param->string; param->string = NULL; break; case OPT_SUBTYPE: if (ctx->subtype) - return invalf(fc, "fuse: Multiple subtypes specified"); + return invalfc(fc, "Multiple subtypes specified"); ctx->subtype = param->string; param->string = NULL; return 0; case OPT_FD: ctx->fd = result.uint_32; - ctx->fd_present = 1; + ctx->fd_present = true; break; case OPT_ROOTMODE: if (!fuse_valid_type(result.uint_32)) - return invalf(fc, "fuse: Invalid rootmode"); + return invalfc(fc, "Invalid rootmode"); ctx->rootmode = result.uint_32; - ctx->rootmode_present = 1; + ctx->rootmode_present = true; break; case OPT_USER_ID: ctx->user_id = make_kuid(fc->user_ns, result.uint_32); if (!uid_valid(ctx->user_id)) - return invalf(fc, "fuse: Invalid user_id"); - ctx->user_id_present = 1; + return invalfc(fc, "Invalid user_id"); + ctx->user_id_present = true; break; case OPT_GROUP_ID: ctx->group_id = make_kgid(fc->user_ns, result.uint_32); if (!gid_valid(ctx->group_id)) - return invalf(fc, "fuse: Invalid group_id"); - ctx->group_id_present = 1; + return invalfc(fc, "Invalid group_id"); + ctx->group_id_present = true; break; case OPT_DEFAULT_PERMISSIONS: - ctx->default_permissions = 1; + ctx->default_permissions = true; break; case OPT_ALLOW_OTHER: - ctx->allow_other = 1; + ctx->allow_other = true; break; case OPT_MAX_READ: @@ -532,7 +527,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) case OPT_BLKSIZE: if (!ctx->is_bdev) - return invalf(fc, "fuse: blksize only supported for fuseblk"); + return invalfc(fc, "blksize only supported for fuseblk"); ctx->blksize = result.uint_32; break; @@ -997,7 +992,7 @@ void fuse_send_init(struct fuse_conn *fc) /* Variable length argument used for backward compatibility with interface version < 7.5. Rest of init_out is zeroed by do_get_request(), so a short reply is not a problem */ - ia->args.out_argvar = 1; + ia->args.out_argvar = true; ia->args.out_args[0].size = sizeof(ia->out); ia->args.out_args[0].value = &ia->out; ia->args.force = true; @@ -1347,7 +1342,7 @@ static struct file_system_type fuse_fs_type = { .name = "fuse", .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, .init_fs_context = fuse_init_fs_context, - .parameters = &fuse_fs_parameters, + .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_anon, }; MODULE_ALIAS_FS("fuse"); @@ -1363,7 +1358,7 @@ static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .init_fs_context = fuse_init_fs_context, - .parameters = &fuse_fs_parameters, + .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 6a40f75a0d25..90e3f01bd796 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -332,7 +332,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx) return -ENOMEM; plus = fuse_use_readdirplus(inode, ctx); - ap->args.out_pages = 1; + ap->args.out_pages = true; ap->num_pages = 1; ap->pages = &page; ap->descs = &desc; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9d58295ccf7a..cb26be6f4351 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); - ssize_t written = 0, ret; + ssize_t ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -867,68 +867,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) - goto out; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = inode_to_bdi(inode); + goto out_unlock; ret = file_remove_privs(file); if (ret) - goto out2; + goto out_unlock; ret = file_update_time(file); if (ret) - goto out2; + goto out_unlock; if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; - loff_t pos, endbyte; - ssize_t buffered; + ssize_t buffered, ret2; - written = gfs2_file_direct_write(iocb, from); - if (written < 0 || !iov_iter_count(from)) - goto out2; + ret = gfs2_file_direct_write(iocb, from); + if (ret < 0 || !iov_iter_count(from)) + goto out_unlock; - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (unlikely(ret < 0)) - goto out2; - buffered = ret; + iocb->ki_flags |= IOCB_DSYNC; + current->backing_dev_info = inode_to_bdi(inode); + buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; + if (unlikely(buffered <= 0)) + goto out_unlock; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT - * semantics. + * semantics. If the writeback or invalidate fails, only report + * the direct I/O range as we don't know if the buffered pages + * made it to disk. */ - pos = iocb->ki_pos; - endbyte = pos + buffered - 1; - ret = filemap_write_and_wait_range(mapping, pos, endbyte); - if (!ret) { - iocb->ki_pos += buffered; - written += buffered; - invalidate_mapping_pages(mapping, - pos >> PAGE_SHIFT, - endbyte >> PAGE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } + iocb->ki_pos += buffered; + ret2 = generic_write_sync(iocb, buffered); + invalidate_mapping_pages(mapping, + (iocb->ki_pos - buffered) >> PAGE_SHIFT, + (iocb->ki_pos - 1) >> PAGE_SHIFT); + if (!ret || ret2 > 0) + ret += ret2; } else { + current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (likely(ret > 0)) + current->backing_dev_info = NULL; + if (likely(ret > 0)) { iocb->ki_pos += ret; + ret = generic_write_sync(iocb, ret); + } } -out2: - current->backing_dev_info = NULL; -out: +out_unlock: inode_unlock(inode); - if (likely(ret > 0)) { - /* Handle various SYNC-type writes */ - ret = generic_write_sync(iocb, ret); - } - return written ? written : ret; + return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index d9431724b788..c090d5ad3f22 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -422,7 +422,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) + if (lh.lh_sequence >= head->lh_sequence) *head = lh; else { ret = true; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b3e904bcc02c..a1a8ef7ed3fd 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1252,6 +1252,7 @@ enum gfs2_param { Opt_upgrade, Opt_acl, Opt_quota, + Opt_quota_flag, Opt_suiddir, Opt_data, Opt_meta, @@ -1266,17 +1267,11 @@ enum gfs2_param { Opt_loccookie, }; -enum opt_quota { - Opt_quota_unset = 0, - Opt_quota_off, - Opt_quota_account, - Opt_quota_on, -}; - -static const unsigned int opt_quota_values[] = { - [Opt_quota_off] = GFS2_QUOTA_OFF, - [Opt_quota_account] = GFS2_QUOTA_ACCOUNT, - [Opt_quota_on] = GFS2_QUOTA_ON, +static const struct constant_table gfs2_param_quota[] = { + {"off", GFS2_QUOTA_OFF}, + {"account", GFS2_QUOTA_ACCOUNT}, + {"on", GFS2_QUOTA_ON}, + {} }; enum opt_data { @@ -1284,12 +1279,24 @@ enum opt_data { Opt_data_ordered = GFS2_DATA_ORDERED, }; +static const struct constant_table gfs2_param_data[] = { + {"writeback", Opt_data_writeback }, + {"ordered", Opt_data_ordered }, + {} +}; + enum opt_errors { Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, Opt_errors_panic = GFS2_ERRORS_PANIC, }; -static const struct fs_parameter_spec gfs2_param_specs[] = { +static const struct constant_table gfs2_param_errors[] = { + {"withdraw", Opt_errors_withdraw }, + {"panic", Opt_errors_panic }, + {} +}; + +static const struct fs_parameter_spec gfs2_fs_parameters[] = { fsparam_string ("lockproto", Opt_lockproto), fsparam_string ("locktable", Opt_locktable), fsparam_string ("hostdata", Opt_hostdata), @@ -1302,11 +1309,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = { fsparam_flag ("upgrade", Opt_upgrade), fsparam_flag_no("acl", Opt_acl), fsparam_flag_no("suiddir", Opt_suiddir), - fsparam_enum ("data", Opt_data), + fsparam_enum ("data", Opt_data, gfs2_param_data), fsparam_flag ("meta", Opt_meta), fsparam_flag_no("discard", Opt_discard), fsparam_s32 ("commit", Opt_commit), - fsparam_enum ("errors", Opt_errors), + fsparam_enum ("errors", Opt_errors, gfs2_param_errors), fsparam_s32 ("statfs_quantum", Opt_statfs_quantum), fsparam_s32 ("statfs_percent", Opt_statfs_percent), fsparam_s32 ("quota_quantum", Opt_quota_quantum), @@ -1314,27 +1321,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = { fsparam_flag_no("rgrplvb", Opt_rgrplvb), fsparam_flag_no("loccookie", Opt_loccookie), /* quota can be a flag or an enum so it gets special treatment */ - __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional), - {} -}; - -static const struct fs_parameter_enum gfs2_param_enums[] = { - { Opt_quota, "off", Opt_quota_off }, - { Opt_quota, "account", Opt_quota_account }, - { Opt_quota, "on", Opt_quota_on }, - { Opt_data, "writeback", Opt_data_writeback }, - { Opt_data, "ordered", Opt_data_ordered }, - { Opt_errors, "withdraw", Opt_errors_withdraw }, - { Opt_errors, "panic", Opt_errors_panic }, + fsparam_flag_no("quota", Opt_quota_flag), + fsparam_enum("quota", Opt_quota, gfs2_param_quota), {} }; -static const struct fs_parameter_description gfs2_fs_parameters = { - .name = "gfs2", - .specs = gfs2_param_specs, - .enums = gfs2_param_enums, -}; - /* Parse a single mount parameter */ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -1342,7 +1333,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) struct fs_parse_result result; int o; - o = fs_parse(fc, &gfs2_fs_parameters, param, &result); + o = fs_parse(fc, gfs2_fs_parameters, param, &result); if (o < 0) return o; @@ -1370,7 +1361,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_debug: if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC) - return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive"); + return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive"); args->ar_debug = result.boolean; break; case Opt_upgrade: @@ -1379,17 +1370,11 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_acl: args->ar_posix_acl = result.boolean; break; + case Opt_quota_flag: + args->ar_quota = result.negated ? GFS2_QUOTA_OFF : GFS2_QUOTA_ON; + break; case Opt_quota: - /* The quota option can be a flag or an enum. A non-zero int_32 - result means that we have an enum index. Otherwise we have - to rely on the 'negated' flag to tell us whether 'quota' or - 'noquota' was specified. */ - if (result.negated) - args->ar_quota = GFS2_QUOTA_OFF; - else if (result.int_32 > 0) - args->ar_quota = opt_quota_values[result.int_32]; - else - args->ar_quota = GFS2_QUOTA_ON; + args->ar_quota = result.int_32; break; case Opt_suiddir: args->ar_suiddir = result.boolean; @@ -1406,27 +1391,27 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_commit: if (result.int_32 <= 0) - return invalf(fc, "gfs2: commit mount option requires a positive numeric argument"); + return invalfc(fc, "commit mount option requires a positive numeric argument"); args->ar_commit = result.int_32; break; case Opt_statfs_quantum: if (result.int_32 < 0) - return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument"); + return invalfc(fc, "statfs_quantum mount option requires a non-negative numeric argument"); args->ar_statfs_quantum = result.int_32; break; case Opt_quota_quantum: if (result.int_32 <= 0) - return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument"); + return invalfc(fc, "quota_quantum mount option requires a positive numeric argument"); args->ar_quota_quantum = result.int_32; break; case Opt_statfs_percent: if (result.int_32 < 0 || result.int_32 > 100) - return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100"); + return invalfc(fc, "statfs_percent mount option requires a numeric argument between 0 and 100"); args->ar_statfs_percent = result.int_32; break; case Opt_errors: if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC) - return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive"); + return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive"); args->ar_errors = result.uint_32; break; case Opt_barrier: @@ -1439,7 +1424,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) args->ar_loccookie = result.boolean; break; default: - return invalf(fc, "gfs2: invalid mount option: %s", param->key); + return invalfc(fc, "invalid mount option: %s", param->key); } return 0; } @@ -1465,27 +1450,27 @@ static int gfs2_reconfigure(struct fs_context *fc) spin_unlock(>->gt_spin); if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) { - errorf(fc, "gfs2: reconfiguration of locking protocol not allowed"); + errorfc(fc, "reconfiguration of locking protocol not allowed"); return -EINVAL; } if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) { - errorf(fc, "gfs2: reconfiguration of lock table not allowed"); + errorfc(fc, "reconfiguration of lock table not allowed"); return -EINVAL; } if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) { - errorf(fc, "gfs2: reconfiguration of host data not allowed"); + errorfc(fc, "reconfiguration of host data not allowed"); return -EINVAL; } if (newargs->ar_spectator != oldargs->ar_spectator) { - errorf(fc, "gfs2: reconfiguration of spectator mode not allowed"); + errorfc(fc, "reconfiguration of spectator mode not allowed"); return -EINVAL; } if (newargs->ar_localflocks != oldargs->ar_localflocks) { - errorf(fc, "gfs2: reconfiguration of localflocks not allowed"); + errorfc(fc, "reconfiguration of localflocks not allowed"); return -EINVAL; } if (newargs->ar_meta != oldargs->ar_meta) { - errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed"); + errorfc(fc, "switching between gfs2 and gfs2meta not allowed"); return -EINVAL; } if (oldargs->ar_spectator) @@ -1495,11 +1480,11 @@ static int gfs2_reconfigure(struct fs_context *fc) if (fc->sb_flags & SB_RDONLY) { error = gfs2_make_fs_ro(sdp); if (error) - errorf(fc, "gfs2: unable to remount read-only"); + errorfc(fc, "unable to remount read-only"); } else { error = gfs2_make_fs_rw(sdp); if (error) - errorf(fc, "gfs2: unable to remount read-write"); + errorfc(fc, "unable to remount read-write"); } } sdp->sd_args = *newargs; @@ -1644,7 +1629,7 @@ struct file_system_type gfs2_fs_type = { .name = "gfs2", .fs_flags = FS_REQUIRES_DEV, .init_fs_context = gfs2_init_fs_context, - .parameters = &gfs2_fs_parameters, + .parameters = gfs2_fs_parameters, .kill_sb = gfs2_kill_sb, .owner = THIS_MODULE, }; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a66e425884d1..aff8642f0c2e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -73,7 +73,7 @@ enum hugetlb_param { Opt_uid, }; -static const struct fs_parameter_spec hugetlb_param_specs[] = { +static const struct fs_parameter_spec hugetlb_fs_parameters[] = { fsparam_u32 ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), fsparam_u32 ("mode", Opt_mode), @@ -84,11 +84,6 @@ static const struct fs_parameter_spec hugetlb_param_specs[] = { {} }; -static const struct fs_parameter_description hugetlb_fs_parameters = { - .name = "hugetlbfs", - .specs = hugetlb_param_specs, -}; - #ifdef CONFIG_NUMA static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma, struct inode *inode, pgoff_t index) @@ -1171,7 +1166,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par unsigned long ps; int opt; - opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result); + opt = fs_parse(fc, hugetlb_fs_parameters, param, &result); if (opt < 0) return opt; @@ -1233,7 +1228,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par } bad_val: - return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n", + return invalfc(fc, "Bad value '%s' for mount option '%s'\n", param->string, param->key); } @@ -1358,7 +1353,7 @@ static int hugetlbfs_init_fs_context(struct fs_context *fc) static struct file_system_type hugetlbfs_fs_type = { .name = "hugetlbfs", .init_fs_context = hugetlbfs_init_fs_context, - .parameters = &hugetlb_fs_parameters, + .parameters = hugetlb_fs_parameters, .kill_sb = kill_litter_super, }; diff --git a/fs/inode.c b/fs/inode.c index c7418b0b4168..7d57068b6b7a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1599,25 +1599,31 @@ retry: } EXPORT_SYMBOL(iput); +#ifdef CONFIG_BLOCK /** * bmap - find a block number in a file - * @inode: inode of file - * @block: block to find - * - * Returns the block number on the device holding the inode that - * is the disk block number for the block of the file requested. - * That is, asked for block 4 of inode 1 the function will return the - * disk block relative to the disk start that holds that block of the - * file. + * @inode: inode owning the block number being requested + * @block: pointer containing the block to find + * + * Replaces the value in *block with the block number on the device holding + * corresponding to the requested block number in the file. + * That is, asked for block 4 of inode 1 the function will replace the + * 4 in *block, with disk block relative to the disk start that holds that + * block of the file. + * + * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a + * hole, returns 0 and *block is also set to 0. */ -sector_t bmap(struct inode *inode, sector_t block) +int bmap(struct inode *inode, sector_t *block) { - sector_t res = 0; - if (inode->i_mapping->a_ops->bmap) - res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); - return res; + if (!inode->i_mapping->a_ops->bmap) + return -EINVAL; + + *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block); + return 0; } EXPORT_SYMBOL(bmap); +#endif /* * With relative atime, only update atime if the previous atime is diff --git a/fs/ioctl.c b/fs/ioctl.c index 7c9a5df5a597..282d45be6f45 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -54,19 +54,32 @@ EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { - struct address_space *mapping = filp->f_mapping; - int res, block; + struct inode *inode = file_inode(filp); + int error, ur_block; + sector_t block; - /* do we support this mess? */ - if (!mapping->a_ops->bmap) - return -EINVAL; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - res = get_user(block, p); - if (res) - return res; - res = mapping->a_ops->bmap(mapping, block); - return put_user(res, p); + + error = get_user(ur_block, p); + if (error) + return error; + + if (ur_block < 0) + return -EINVAL; + + block = ur_block; + error = bmap(inode, &block); + + if (error) + ur_block = 0; + else + ur_block = block; + + if (put_user(ur_block, p)) + error = -EFAULT; + + return error; } /** @@ -523,13 +536,9 @@ static int compat_ioctl_preallocate(struct file *file, int mode, static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p) { - struct inode *inode = file_inode(filp); - switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); - case FIONREAD: - return put_user(i_size_read(inode) - filp->f_pos, p); case FS_IOC_RESVSP: case FS_IOC_RESVSP64: return ioctl_preallocate(filp, 0, p); @@ -721,6 +730,13 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd, case FIDEDUPERANGE: return ioctl_file_dedupe_range(filp, argp); + case FIONREAD: + if (!S_ISREG(inode->i_mode)) + return vfs_ioctl(filp, cmd, arg); + + return put_user(i_size_read(inode) - filp->f_pos, + (int __user *)argp); + default: if (S_ISREG(inode->i_mode)) return file_ioctl(filp, cmd, argp); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index eb8ca446d1ab..a49d0e670ddf 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -794,18 +794,22 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, { int err = 0; unsigned long long ret; + sector_t block = 0; if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { + block = blocknr; + ret = bmap(journal->j_inode, &block); + + if (ret || !block) { printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", __func__, blocknr, journal->j_devname); err = -EIO; jbd2_journal_abort(journal, err); + } else { + *retp = block; } + } else { *retp = blocknr; /* +journal->j_blk_offset */ } @@ -1243,11 +1247,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev, journal_t *jbd2_journal_init_inode(struct inode *inode) { journal_t *journal; + sector_t blocknr; char *p; - unsigned long long blocknr; + int err = 0; + + blocknr = 0; + err = bmap(inode, &blocknr); - blocknr = bmap(inode, 0); - if (!blocknr) { + if (err || !blocknr) { pr_err("%s: Cannot locate journal superblock\n", __func__); return NULL; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0e6406c4f362..05d7878dfad1 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -167,27 +167,21 @@ enum { Opt_rp_size, }; -static const struct fs_parameter_spec jffs2_param_specs[] = { - fsparam_enum ("compr", Opt_override_compr), - fsparam_u32 ("rp_size", Opt_rp_size), - {} -}; - -static const struct fs_parameter_enum jffs2_param_enums[] = { - { Opt_override_compr, "none", JFFS2_COMPR_MODE_NONE }, +static const struct constant_table jffs2_param_compr[] = { + {"none", JFFS2_COMPR_MODE_NONE }, #ifdef CONFIG_JFFS2_LZO - { Opt_override_compr, "lzo", JFFS2_COMPR_MODE_FORCELZO }, + {"lzo", JFFS2_COMPR_MODE_FORCELZO }, #endif #ifdef CONFIG_JFFS2_ZLIB - { Opt_override_compr, "zlib", JFFS2_COMPR_MODE_FORCEZLIB }, + {"zlib", JFFS2_COMPR_MODE_FORCEZLIB }, #endif {} }; -const struct fs_parameter_description jffs2_fs_parameters = { - .name = "jffs2", - .specs = jffs2_param_specs, - .enums = jffs2_param_enums, +static const struct fs_parameter_spec jffs2_fs_parameters[] = { + fsparam_enum ("compr", Opt_override_compr, jffs2_param_compr), + fsparam_u32 ("rp_size", Opt_rp_size), + {} }; static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) @@ -196,7 +190,7 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) struct jffs2_sb_info *c = fc->s_fs_info; int opt; - opt = fs_parse(fc, &jffs2_fs_parameters, param, &result); + opt = fs_parse(fc, jffs2_fs_parameters, param, &result); if (opt < 0) return opt; @@ -339,7 +333,7 @@ static struct file_system_type jffs2_fs_type = { .owner = THIS_MODULE, .name = "jffs2", .init_fs_context = jffs2_init_fs_context, - .parameters = &jffs2_fs_parameters, + .parameters = jffs2_fs_parameters, .kill_sb = jffs2_kill_sb, }; MODULE_ALIAS_FS("jffs2"); diff --git a/fs/namespace.c b/fs/namespace.c index 5e1bf611a9eb..85b5f7bea82e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2979,39 +2979,10 @@ static void shrink_submounts(struct mount *mnt) } } -/* - * Some copy_from_user() implementations do not return the exact number of - * bytes remaining to copy on a fault. But copy_mount_options() requires that. - * Note that this function differs from copy_from_user() in that it will oops - * on bad values of `to', rather than returning a short copy. - */ -static long exact_copy_from_user(void *to, const void __user * from, - unsigned long n) -{ - char *t = to; - const char __user *f = from; - char c; - - if (!access_ok(from, n)) - return n; - - while (n) { - if (__get_user(c, f)) { - memset(t, 0, n); - break; - } - *t++ = c; - f++; - n--; - } - return n; -} - void *copy_mount_options(const void __user * data) { - int i; - unsigned long size; char *copy; + unsigned size; if (!data) return NULL; @@ -3020,22 +2991,16 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - /* We only care that *some* data at the address the user - * gave us is valid. Just in case, we'll zero - * the remainder of the page. - */ - /* copy_from_user cannot cross TASK_SIZE ! */ - size = TASK_SIZE - (unsigned long)untagged_addr(data); - if (size > PAGE_SIZE) - size = PAGE_SIZE; + size = PAGE_SIZE - offset_in_page(data); - i = size - exact_copy_from_user(copy, data, size); - if (!i) { + if (copy_from_user(copy, data, size)) { kfree(copy); return ERR_PTR(-EFAULT); } - if (i != PAGE_SIZE) - memset(copy + i, 0, PAGE_SIZE - i); + if (size != PAGE_SIZE) { + if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) + memset(copy + size, 0, PAGE_SIZE - size); + } return copy; } diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 295a7a21b774..40b6c5ac46c0 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -90,7 +90,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. @@ -196,3 +196,12 @@ config NFS_DEBUG depends on NFS_FS && SUNRPC_DEBUG select CRC32 default y + +config NFS_DISABLE_UDP_SUPPORT + bool "NFS: Disable NFS UDP protocol support" + depends on NFS_FS + default y + help + Choose Y here to disable the use of NFS over UDP. NFS over UDP + on modern networks (1Gb+) can lead to data corruption caused by + fragmentation during high loads. diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 34cdeaecccf6..2433c3e03cfa 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,7 @@ CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfstrace.o \ - export.o sysfs.o + export.o sysfs.o fs_context.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 03a20f5716c7..79ff172eb1c8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,6 +18,7 @@ #include "callback.h" #include "internal.h" #include "nfs4session.h" +#include "nfs4trace.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status @@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); - if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) { - if (cps.clp) - nfs_put_client(cps.clp); + if (!cps.clp) { + trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident); + goto out_invalidcred; + } + if (!check_gss_callback_principal(cps.clp, rqstp)) { + trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident); + nfs_put_client(cps.clp); goto out_invalidcred; } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 02110a30a49e..989c30c98511 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -474,6 +474,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = to->to_initval; to->to_exponential = 0; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; @@ -484,6 +485,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; +#endif default: BUG(); } @@ -580,8 +582,10 @@ static int nfs_start_lockd(struct nfs_server *server) default: nlm_init.protocol = IPPROTO_TCP; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: nlm_init.protocol = IPPROTO_UDP; +#endif } host = nlmclnt_init(&nlm_init); @@ -658,28 +662,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client); * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data, - struct nfs_subversion *nfs_mod) + const struct fs_context *fc) { + const struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { - .hostname = data->nfs_server.hostname, - .addr = (const struct sockaddr *)&data->nfs_server.address, - .addrlen = data->nfs_server.addrlen, - .nfs_mod = nfs_mod, - .proto = data->nfs_server.protocol, - .net = data->net, + .hostname = ctx->nfs_server.hostname, + .addr = (const struct sockaddr *)&ctx->nfs_server.address, + .addrlen = ctx->nfs_server.addrlen, + .nfs_mod = ctx->nfs_mod, + .proto = ctx->nfs_server.protocol, + .net = fc->net_ns, .timeparms = &timeparms, .cred = server->cred, - .nconnect = data->nfs_server.nconnect, + .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - if (data->flags & NFS_MOUNT_NORESVPORT) + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); + if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ @@ -690,46 +694,46 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; + server->flags = ctx->flags; + server->options = ctx->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; /* Start lockd here, before we might error out */ error = nfs_start_lockd(server); if (error < 0) goto error; - server->port = data->nfs_server.port; - server->auth_info = data->auth_info; + server->port = ctx->nfs_server.port; + server->auth_info = ctx->auth_info; error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + ctx->selected_flavor); if (error < 0) goto error; /* Preserve the values of mount_server-related mount options */ - if (data->mount_server.addrlen) { - memcpy(&server->mountd_address, &data->mount_server.address, - data->mount_server.addrlen); - server->mountd_addrlen = data->mount_server.addrlen; + if (ctx->mount_server.addrlen) { + memcpy(&server->mountd_address, &ctx->mount_server.address, + ctx->mount_server.addrlen); + server->mountd_addrlen = ctx->mount_server.addrlen; } - server->mountd_version = data->mount_server.version; - server->mountd_port = data->mount_server.port; - server->mountd_protocol = data->mount_server.protocol; + server->mountd_version = ctx->mount_server.version; + server->mountd_port = ctx->mount_server.port; + server->mountd_protocol = ctx->mount_server.protocol; - server->namelen = data->namlen; + server->namelen = ctx->namlen; return 0; error: @@ -951,9 +955,9 @@ EXPORT_SYMBOL_GPL(nfs_free_server); * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; struct nfs_fattr *fattr; int error; @@ -970,18 +974,18 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, goto error; /* Get a client representation */ - error = nfs_init_server(server, mount_info->parsed, nfs_mod); + error = nfs_init_server(server, fc); if (error < 0) goto error; /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); + error = nfs_probe_fsinfo(server, ctx->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -989,8 +993,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, - fattr, NULL, NULL); + error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, + fattr, NULL, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fe57b2b5314a..4a841071d8a7 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -25,13 +25,32 @@ #include "internal.h" #include "nfs4trace.h" -static void nfs_free_delegation(struct nfs_delegation *delegation) +#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U) + +static atomic_long_t nfs_active_delegations; +static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; + +static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); delegation->cred = NULL; kfree_rcu(delegation, rcu); } +static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +{ + if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; + atomic_long_dec(&nfs_active_delegations); + } +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + nfs_mark_delegation_revoked(delegation); + __nfs_free_delegation(delegation); +} + /** * nfs_mark_delegation_referenced - set delegation's REFERENCED flag * @delegation: delegation to process @@ -343,7 +362,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, delegation->stateid.seqid = update->stateid.seqid; smp_wmb(); delegation->type = update->type; - clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); } } @@ -423,6 +443,8 @@ add_new: rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; + atomic_long_inc(&nfs_active_delegations); + trace_nfs4_set_delegation(inode, type); spin_lock(&inode->i_lock); @@ -432,7 +454,7 @@ add_new: out: spin_unlock(&clp->cl_lock); if (delegation != NULL) - nfs_free_delegation(delegation); + __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); nfs_free_delegation(freeme); @@ -479,7 +501,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { struct inode *inode; spin_lock(&delegation->lock); @@ -488,6 +510,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } + if (ret) + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ret = false; @@ -607,6 +631,7 @@ void nfs_inode_evict_delegation(struct inode *inode) delegation = nfs_inode_detach_delegation(inode); if (delegation != NULL) { + set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); nfs_free_delegation(delegation); @@ -637,6 +662,40 @@ int nfs4_inode_return_delegation(struct inode *inode) } /** + * nfs_inode_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called on file close in order to determine if the + * inode delegation needs to be returned immediately. + */ +void nfs4_inode_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) || + atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) { + spin_lock(&delegation->lock); + if (delegation->inode && + list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + ret = delegation; + } + spin_unlock(&delegation->lock); + } +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + +/** * nfs4_inode_make_writeable * @inode: pointer to inode * @@ -760,13 +819,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_mark_delegation_revoked(struct nfs_server *server, - struct nfs_delegation *delegation) -{ - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; -} - static void nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { @@ -794,7 +846,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); ret = true; out: rcu_read_unlock(); @@ -833,7 +885,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); out_clear_returning: clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); @@ -1317,3 +1369,5 @@ out: rcu_read_unlock(); return ret; } + +module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 15d3484be028..31b84604d383 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,6 +42,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); +void nfs4_inode_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e180033e35cf..1320288ff9ec 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -58,7 +58,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .iterate = nfs_readdir, + .iterate_shared = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, @@ -162,6 +162,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -186,7 +198,7 @@ static int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) { string->len = len; - string->name = kmemdup(name, len, GFP_KERNEL); + string->name = kmemdup_nul(name, len, GFP_KERNEL); if (string->name == NULL) return -ENOMEM; /* @@ -437,7 +449,8 @@ void nfs_force_use_readdirplus(struct inode *dir) if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && !list_empty(&nfsi->open_files)) { set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, 0, -1); + invalidate_mapping_pages(dir->i_mapping, + nfsi->page_index + 1, -1); } } @@ -610,6 +623,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -626,8 +641,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -682,6 +695,7 @@ int nfs_readdir_filler(void *data, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } @@ -689,8 +703,6 @@ int nfs_readdir_filler(void *data, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -704,19 +716,32 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { + struct inode *inode = file_inode(desc->file); + struct nfs_inode *nfsi = NFS_I(inode); int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) { + nfsi->page_index = desc->page_index; + return 0; + } + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -731,7 +756,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -770,7 +795,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -816,13 +840,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -887,6 +911,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); @@ -1142,10 +1168,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (fhandle == NULL || fattr == NULL || IS_ERR(label)) goto out; - ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (ret < 0) { - if (ret == -ESTALE || ret == -ENOENT) + switch (ret) { + case -ESTALE: + case -ENOENT: ret = 0; + break; + case -ETIMEDOUT: + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + ret = 1; + } goto out; } ret = 0; @@ -1408,7 +1441,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in goto out; trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1683,7 +1716,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL); if (error) goto out_error; } @@ -2312,11 +2345,11 @@ static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, s /* Found an entry, is our attribute cache valid? */ if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; + if (!retry) + break; err = -ECHILD; if (!may_block) goto out; - if (!retry) - goto out_zap; spin_unlock(&inode->i_lock); err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (err) @@ -2353,7 +2386,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || - cred != cache->cred) + cred_fscmp(cred, cache->cred) != 0) cache = NULL; if (cache == NULL) goto out; @@ -2476,7 +2509,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; - int cache_mask; + int cache_mask = -1; int status; trace_nfs_access_enter(inode); @@ -2515,7 +2548,7 @@ out_cached: if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: - trace_nfs_access_exit(inode, status); + trace_nfs_access_exit(inode, mask, cache_mask, status); return status; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 040a50fd9bf3..b768a0b42e82 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** @@ -824,7 +824,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.count; + hdr->good_bytes = hdr->args.offset + hdr->args.count - + hdr->io_start; } spin_unlock(&dreq->lock); } diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index aec769a500a1..89bd5581f317 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -93,7 +93,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew, key = container_of(ckey, struct nfs_dns_ent, h); kfree(new->hostname); - new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); + new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL); if (new->hostname) { new->namelen = key->namelen; nfs_dns_ent_update(cnew, ckey); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8eb731d9be3e..f96367a2463e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -204,44 +204,39 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); static int nfs_file_fsync_commit(struct file *file, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); - int do_resend, status; - int ret = 0; + int ret; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status == 0) - status = file_check_and_advance_wb_err(file); - if (status < 0) { - ret = status; - goto out; - } - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - if (do_resend) - ret = -EAGAIN; -out: - return ret; + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + return ret; + return file_check_and_advance_wb_err(file); } int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int ret; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + int ret; trace_nfs_fsync_enter(inode); - do { + for (;;) { ret = file_write_and_wait_range(file, start, end); if (ret != 0) break; ret = nfs_file_fsync_commit(file, datasync); - if (!ret) - ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + break; /* * If nfs_file_fsync_commit detected a server reboot, then * resend all dirty pages that might have been covered by @@ -249,7 +244,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ start = 0; end = LLONG_MAX; - } while (ret == -EAGAIN); + } trace_nfs_fsync_exit(inode, ret); return ret; @@ -489,7 +484,19 @@ static int nfs_launder_page(struct page *page) static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { + unsigned long blocks; + long long isize; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); + struct inode *inode = file->f_mapping->host; + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } *span = sis->pages; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5657b7f2611f..bb9148b83166 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1266,9 +1266,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task, static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, int idx, u64 offset, u64 length, - u32 status, int opnum, int error) + u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; + u32 status = *op_status; int err; if (status == 0) { @@ -1286,10 +1287,10 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ENOBUFS: case -EPIPE: case -EPERM: - status = NFS4ERR_NXIO; + *op_status = status = NFS4ERR_NXIO; break; case -EACCES: - status = NFS4ERR_ACCESS; + *op_status = status = NFS4ERR_ACCESS; break; default: return; @@ -1321,16 +1322,19 @@ static int ff_layout_read_done_cb(struct rpc_task *task, int new_idx = hdr->pgio_mirror_idx; int err; - trace_nfs4_pnfs_read(hdr, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_READ, + &hdr->res.op_status, OP_READ, task->tk_status); + trace_ff_layout_read_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_read(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1494,16 +1498,19 @@ static int ff_layout_write_done_cb(struct rpc_task *task, loff_t end_offs = 0; int err; - trace_nfs4_pnfs_write(hdr, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_WRITE, + &hdr->res.op_status, OP_WRITE, task->tk_status); + trace_ff_layout_write_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_write(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1537,15 +1544,18 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, { int err; - trace_nfs4_pnfs_commit_ds(data, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, data->args.offset, data->args.count, - data->res.op_status, OP_COMMIT, + &data->res.op_status, OP_COMMIT, task->tk_status); + trace_ff_layout_commit_error(data); + } + err = ff_layout_async_handle_error(task, NULL, data->ds_clp, data->lseg, data->ds_commit_index); + trace_nfs4_pnfs_commit_ds(data, err); switch (err) { case -NFS4ERR_RESET_TO_PNFS: pnfs_generic_prepare_to_resend_writes(data); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c new file mode 100644 index 000000000000..e1b938457ab9 --- /dev/null +++ b/fs/nfs/fs_context.c @@ -0,0 +1,1440 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/fs/nfs/fs_context.c + * + * Copyright (C) 1992 Rick Sladkey + * Conversion to new mount api Copyright (C) David Howells + * + * NFS mount handling. + * + * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com> + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include "nfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_MOUNT + +#if IS_ENABLED(CONFIG_NFS_V3) +#define NFS_DEFAULT_VERSION 3 +#else +#define NFS_DEFAULT_VERSION 2 +#endif + +#define NFS_MAX_CONNECTIONS 16 + +enum nfs_param { + Opt_ac, + Opt_acdirmax, + Opt_acdirmin, + Opt_acl, + Opt_acregmax, + Opt_acregmin, + Opt_actimeo, + Opt_addr, + Opt_bg, + Opt_bsize, + Opt_clientaddr, + Opt_cto, + Opt_fg, + Opt_fscache, + Opt_fscache_flag, + Opt_hard, + Opt_intr, + Opt_local_lock, + Opt_lock, + Opt_lookupcache, + Opt_migration, + Opt_minorversion, + Opt_mountaddr, + Opt_mounthost, + Opt_mountport, + Opt_mountproto, + Opt_mountvers, + Opt_namelen, + Opt_nconnect, + Opt_port, + Opt_posix, + Opt_proto, + Opt_rdirplus, + Opt_rdma, + Opt_resvport, + Opt_retrans, + Opt_retry, + Opt_rsize, + Opt_sec, + Opt_sharecache, + Opt_sloppy, + Opt_soft, + Opt_softerr, + Opt_softreval, + Opt_source, + Opt_tcp, + Opt_timeo, + Opt_udp, + Opt_v, + Opt_vers, + Opt_wsize, +}; + +enum { + Opt_local_lock_all, + Opt_local_lock_flock, + Opt_local_lock_none, + Opt_local_lock_posix, +}; + +static const struct constant_table nfs_param_enums_local_lock[] = { + { "all", Opt_local_lock_all }, + { "flock", Opt_local_lock_flock }, + { "none", Opt_local_lock_none }, + {} +}; + +enum { + Opt_lookupcache_all, + Opt_lookupcache_none, + Opt_lookupcache_positive, +}; + +static const struct constant_table nfs_param_enums_lookupcache[] = { + { "all", Opt_lookupcache_all }, + { "none", Opt_lookupcache_none }, + { "pos", Opt_lookupcache_positive }, + { "positive", Opt_lookupcache_positive }, + {} +}; + +static const struct fs_parameter_spec nfs_fs_parameters[] = { + fsparam_flag_no("ac", Opt_ac), + fsparam_u32 ("acdirmax", Opt_acdirmax), + fsparam_u32 ("acdirmin", Opt_acdirmin), + fsparam_flag_no("acl", Opt_acl), + fsparam_u32 ("acregmax", Opt_acregmax), + fsparam_u32 ("acregmin", Opt_acregmin), + fsparam_u32 ("actimeo", Opt_actimeo), + fsparam_string("addr", Opt_addr), + fsparam_flag ("bg", Opt_bg), + fsparam_u32 ("bsize", Opt_bsize), + fsparam_string("clientaddr", Opt_clientaddr), + fsparam_flag_no("cto", Opt_cto), + fsparam_flag ("fg", Opt_fg), + fsparam_flag_no("fsc", Opt_fscache_flag), + fsparam_string("fsc", Opt_fscache), + fsparam_flag ("hard", Opt_hard), + __fsparam(NULL, "intr", Opt_intr, + fs_param_neg_with_no|fs_param_deprecated, NULL), + fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock), + fsparam_flag_no("lock", Opt_lock), + fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache), + fsparam_flag_no("migration", Opt_migration), + fsparam_u32 ("minorversion", Opt_minorversion), + fsparam_string("mountaddr", Opt_mountaddr), + fsparam_string("mounthost", Opt_mounthost), + fsparam_u32 ("mountport", Opt_mountport), + fsparam_string("mountproto", Opt_mountproto), + fsparam_u32 ("mountvers", Opt_mountvers), + fsparam_u32 ("namlen", Opt_namelen), + fsparam_u32 ("nconnect", Opt_nconnect), + fsparam_string("nfsvers", Opt_vers), + fsparam_u32 ("port", Opt_port), + fsparam_flag_no("posix", Opt_posix), + fsparam_string("proto", Opt_proto), + fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag ("rdma", Opt_rdma), + fsparam_flag_no("resvport", Opt_resvport), + fsparam_u32 ("retrans", Opt_retrans), + fsparam_string("retry", Opt_retry), + fsparam_u32 ("rsize", Opt_rsize), + fsparam_string("sec", Opt_sec), + fsparam_flag_no("sharecache", Opt_sharecache), + fsparam_flag ("sloppy", Opt_sloppy), + fsparam_flag ("soft", Opt_soft), + fsparam_flag ("softerr", Opt_softerr), + fsparam_flag ("softreval", Opt_softreval), + fsparam_string("source", Opt_source), + fsparam_flag ("tcp", Opt_tcp), + fsparam_u32 ("timeo", Opt_timeo), + fsparam_flag ("udp", Opt_udp), + fsparam_flag ("v2", Opt_v), + fsparam_flag ("v3", Opt_v), + fsparam_flag ("v4", Opt_v), + fsparam_flag ("v4.0", Opt_v), + fsparam_flag ("v4.1", Opt_v), + fsparam_flag ("v4.2", Opt_v), + fsparam_string("vers", Opt_vers), + fsparam_u32 ("wsize", Opt_wsize), + {} +}; + +enum { + Opt_vers_2, + Opt_vers_3, + Opt_vers_4, + Opt_vers_4_0, + Opt_vers_4_1, + Opt_vers_4_2, +}; + +static const struct constant_table nfs_vers_tokens[] = { + { "2", Opt_vers_2 }, + { "3", Opt_vers_3 }, + { "4", Opt_vers_4 }, + { "4.0", Opt_vers_4_0 }, + { "4.1", Opt_vers_4_1 }, + { "4.2", Opt_vers_4_2 }, +}; + +enum { + Opt_xprt_rdma, + Opt_xprt_rdma6, + Opt_xprt_tcp, + Opt_xprt_tcp6, + Opt_xprt_udp, + Opt_xprt_udp6, + nr__Opt_xprt +}; + +static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = { + { "rdma", Opt_xprt_rdma }, + { "rdma6", Opt_xprt_rdma6 }, + { "tcp", Opt_xprt_tcp }, + { "tcp6", Opt_xprt_tcp6 }, + { "udp", Opt_xprt_udp }, + { "udp6", Opt_xprt_udp6 }, +}; + +enum { + Opt_sec_krb5, + Opt_sec_krb5i, + Opt_sec_krb5p, + Opt_sec_lkey, + Opt_sec_lkeyi, + Opt_sec_lkeyp, + Opt_sec_none, + Opt_sec_spkm, + Opt_sec_spkmi, + Opt_sec_spkmp, + Opt_sec_sys, + nr__Opt_sec +}; + +static const struct constant_table nfs_secflavor_tokens[] = { + { "krb5", Opt_sec_krb5 }, + { "krb5i", Opt_sec_krb5i }, + { "krb5p", Opt_sec_krb5p }, + { "lkey", Opt_sec_lkey }, + { "lkeyi", Opt_sec_lkeyi }, + { "lkeyp", Opt_sec_lkeyp }, + { "none", Opt_sec_none }, + { "null", Opt_sec_none }, + { "spkm3", Opt_sec_spkm }, + { "spkm3i", Opt_sec_spkmi }, + { "spkm3p", Opt_sec_spkmp }, + { "sys", Opt_sec_sys }, +}; + +/* + * Sanity-check a server address provided by the mount command. + * + * Address family must be initialized, and address must not be + * the ANY address for that family. + */ +static int nfs_verify_server_address(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *)addr; + return sa->sin_addr.s_addr != htonl(INADDR_ANY); + } + case AF_INET6: { + struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; + return !ipv6_addr_any(sa); + } + } + + dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); + return 0; +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx) +{ + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) +{ + nfs_validate_transport_protocol(ctx); + + if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || + ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) + return; + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * Add 'flavor' to 'auth_info' if not already present. + * Returns true if 'flavor' ends up in the list, false otherwise + */ +static int nfs_auth_info_add(struct fs_context *fc, + struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) +{ + unsigned int i; + unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); + + /* make sure this flavor isn't already in the list */ + for (i = 0; i < auth_info->flavor_len; i++) { + if (flavor == auth_info->flavors[i]) + return 0; + } + + if (auth_info->flavor_len + 1 >= max_flavor_len) + return nfs_invalf(fc, "NFS: too many sec= flavors"); + + auth_info->flavors[auth_info->flavor_len++] = flavor; + return 0; +} + +/* + * Parse the value of the 'sec=' option. + */ +static int nfs_parse_security_flavors(struct fs_context *fc, + struct fs_parameter *param) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + rpc_authflavor_t pseudoflavor; + char *string = param->string, *p; + int ret; + + dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string); + + while ((p = strsep(&string, ":")) != NULL) { + if (!*p) + continue; + switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); + } + + ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); + if (ret < 0) + return ret; + } + + return 0; +} + +static int nfs_parse_version_string(struct fs_context *fc, + const char *string) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + ctx->flags &= ~NFS_MOUNT_VER3; + switch (lookup_constant(nfs_vers_tokens, string, -1)) { + case Opt_vers_2: + ctx->version = 2; + break; + case Opt_vers_3: + ctx->flags |= NFS_MOUNT_VER3; + ctx->version = 3; + break; + case Opt_vers_4: + /* Backward compatibility option. In future, + * the mount program should always supply + * a NFSv4 minor version number. + */ + ctx->version = 4; + break; + case Opt_vers_4_0: + ctx->version = 4; + ctx->minorversion = 0; + break; + case Opt_vers_4_1: + ctx->version = 4; + ctx->minorversion = 1; + break; + case Opt_vers_4_2: + ctx->version = 4; + ctx->minorversion = 2; + break; + default: + return nfs_invalf(fc, "NFS: Unsupported NFS version"); + } + return 0; +} + +/* + * Parse a single mount parameter. + */ +static int nfs_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct fs_parse_result result; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + unsigned short protofamily, mountfamily; + unsigned int len; + int ret, opt; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key); + + opt = fs_parse(fc, nfs_fs_parameters, param, &result); + if (opt < 0) + return ctx->sloppy ? 1 : opt; + + switch (opt) { + case Opt_source: + if (fc->source) + return nfs_invalf(fc, "NFS: Multiple sources not supported"); + fc->source = param->string; + param->string = NULL; + break; + + /* + * boolean options: foo/nofoo + */ + case Opt_soft: + ctx->flags |= NFS_MOUNT_SOFT; + ctx->flags &= ~NFS_MOUNT_SOFTERR; + break; + case Opt_softerr: + ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; + ctx->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_hard: + ctx->flags &= ~(NFS_MOUNT_SOFT | + NFS_MOUNT_SOFTERR | + NFS_MOUNT_SOFTREVAL); + break; + case Opt_softreval: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_SOFTREVAL; + else + ctx->flags &= NFS_MOUNT_SOFTREVAL; + break; + case Opt_posix: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_POSIX; + else + ctx->flags |= NFS_MOUNT_POSIX; + break; + case Opt_cto: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOCTO; + else + ctx->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_ac: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOAC; + else + ctx->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_lock: + if (result.negated) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } + break; + case Opt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_rdma: + ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(param->key); + break; + case Opt_acl: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOACL; + else + ctx->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + if (result.negated) + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + else + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + if (result.negated) + ctx->flags |= NFS_MOUNT_UNSHARED; + else + ctx->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_resvport: + if (result.negated) + ctx->flags |= NFS_MOUNT_NORESVPORT; + else + ctx->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_fscache_flag: + if (result.negated) + ctx->options &= ~NFS_OPTION_FSCACHE; + else + ctx->options |= NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = NULL; + break; + case Opt_fscache: + ctx->options |= NFS_OPTION_FSCACHE; + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = param->string; + param->string = NULL; + break; + case Opt_migration: + if (result.negated) + ctx->options &= ~NFS_OPTION_MIGRATION; + else + ctx->options |= NFS_OPTION_MIGRATION; + break; + + /* + * options that take numeric values + */ + case Opt_port: + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->nfs_server.port = result.uint_32; + break; + case Opt_rsize: + ctx->rsize = result.uint_32; + break; + case Opt_wsize: + ctx->wsize = result.uint_32; + break; + case Opt_bsize: + ctx->bsize = result.uint_32; + break; + case Opt_timeo: + if (result.uint_32 < 1 || result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->timeo = result.uint_32; + break; + case Opt_retrans: + if (result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->retrans = result.uint_32; + break; + case Opt_acregmin: + ctx->acregmin = result.uint_32; + break; + case Opt_acregmax: + ctx->acregmax = result.uint_32; + break; + case Opt_acdirmin: + ctx->acdirmin = result.uint_32; + break; + case Opt_acdirmax: + ctx->acdirmax = result.uint_32; + break; + case Opt_actimeo: + ctx->acregmin = result.uint_32; + ctx->acregmax = result.uint_32; + ctx->acdirmin = result.uint_32; + ctx->acdirmax = result.uint_32; + break; + case Opt_namelen: + ctx->namlen = result.uint_32; + break; + case Opt_mountport: + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->mount_server.port = result.uint_32; + break; + case Opt_mountvers: + if (result.uint_32 < NFS_MNT_VERSION || + result.uint_32 > NFS_MNT3_VERSION) + goto out_of_bounds; + ctx->mount_server.version = result.uint_32; + break; + case Opt_minorversion: + if (result.uint_32 > NFS4_MAX_MINOR_VERSION) + goto out_of_bounds; + ctx->minorversion = result.uint_32; + break; + + /* + * options that take text values + */ + case Opt_v: + ret = nfs_parse_version_string(fc, param->key + 1); + if (ret < 0) + return ret; + break; + case Opt_vers: + ret = nfs_parse_version_string(fc, param->string); + if (ret < 0) + return ret; + break; + case Opt_sec: + ret = nfs_parse_security_flavors(fc, param); + if (ret < 0) + return ret; + break; + + case Opt_proto: + protofamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { + case Opt_xprt_udp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(param->string); + break; + default: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + } + + ctx->protofamily = protofamily; + break; + + case Opt_mountproto: + mountfamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { + case Opt_xprt_udp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma: /* not used for side protocols */ + default: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + } + ctx->mountfamily = mountfamily; + break; + + case Opt_addr: + len = rpc_pton(fc->net_ns, param->string, param->size, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address)); + if (len == 0) + goto out_invalid_address; + ctx->nfs_server.addrlen = len; + break; + case Opt_clientaddr: + kfree(ctx->client_address); + ctx->client_address = param->string; + param->string = NULL; + break; + case Opt_mounthost: + kfree(ctx->mount_server.hostname); + ctx->mount_server.hostname = param->string; + param->string = NULL; + break; + case Opt_mountaddr: + len = rpc_pton(fc->net_ns, param->string, param->size, + &ctx->mount_server.address, + sizeof(ctx->mount_server._address)); + if (len == 0) + goto out_invalid_address; + ctx->mount_server.addrlen = len; + break; + case Opt_nconnect: + if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) + goto out_of_bounds; + ctx->nfs_server.nconnect = result.uint_32; + break; + case Opt_lookupcache: + switch (result.uint_32) { + case Opt_lookupcache_all: + ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + break; + case Opt_lookupcache_positive: + ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + break; + case Opt_lookupcache_none: + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + break; + default: + goto out_invalid_value; + } + break; + case Opt_local_lock: + switch (result.uint_32) { + case Opt_local_lock_all: + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_local_lock_flock: + ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; + break; + case Opt_local_lock_posix: + ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; + break; + case Opt_local_lock_none: + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + default: + goto out_invalid_value; + } + break; + + /* + * Special options + */ + case Opt_sloppy: + ctx->sloppy = true; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; + } + + return 0; + +out_invalid_value: + return nfs_invalf(fc, "NFS: Bad mount option value specified"); +out_invalid_address: + return nfs_invalf(fc, "NFS: Bad IP address specified"); +out_of_bounds: + return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); +} + +/* + * Split fc->source into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_source(struct fs_context *fc, + size_t maxnamlen, size_t maxpathlen) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + const char *dev_name = fc->source; + size_t len; + const char *end; + + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + + /* Is the host name protected with square brakcets? */ + if (*dev_name == '[') { + end = strchr(++dev_name, ']'); + if (end == NULL || end[1] != ':') + goto out_bad_devname; + + len = end - dev_name; + end++; + } else { + const char *comma; + + end = strchr(dev_name, ':'); + if (end == NULL) + goto out_bad_devname; + len = end - dev_name; + + /* kill possible hostname list: not supported */ + comma = memchr(dev_name, ',', len); + if (comma) + len = comma - dev_name; + } + + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + goto out_nomem; + len = strlen(++end); + if (len > maxpathlen) + goto out_path; + ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); + if (!ctx->nfs_server.export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path); + return 0; + +out_bad_devname: + return nfs_invalf(fc, "NFS: device name not in host:path format"); +out_nomem: + nfs_errorf(fc, "NFS: not enough memory to parse device name"); + return -ENOMEM; +out_hostname: + nfs_errorf(fc, "NFS: server hostname too long"); + return -ENAMETOOLONG; +out_path: + nfs_errorf(fc, "NFS: export pathname too long"); + return -ENAMETOOLONG; +} + +static inline bool is_remount_fc(struct fs_context *fc) +{ + return fc->root != NULL; +} + +/* + * Parse monolithic NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + */ +static int nfs23_parse_monolithic(struct fs_context *fc, + struct nfs_mount_data *data) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_fh *mntfh = ctx->mntfh; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; + + if (data == NULL) + goto out_no_data; + + ctx->version = NFS_DEFAULT_VERSION; + switch (data->version) { + case 1: + data->namlen = 0; /* fall through */ + case 2: + data->bsize = 0; /* fall through */ + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + /* Turn off security negotiation */ + extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + /* fall through */ + case 5: + memset(data->context, 0, sizeof(data->context)); + /* fall through */ + case 6: + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; + mntfh->size = data->root.size; + ctx->version = 3; + } else { + mntfh->size = NFS2_FHSIZE; + ctx->version = 2; + } + + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + + /* + * Translate to nfs_fs_context, which nfs_fill_super + * can deal with. + */ + ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; + ctx->flags |= extra_flags; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->need_mount = false; + + memcpy(sap, &data->addr, sizeof(data->addr)); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) + goto out_no_address; + + if (!(data->flags & NFS_MOUNT_TCP)) + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + /* N.B. caller will free nfs_server.hostname in all cases */ + ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + goto out_nomem; + + ctx->namlen = data->namlen; + ctx->bsize = data->bsize; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + ctx->selected_flavor = data->pseudoflavor; + else + ctx->selected_flavor = RPC_AUTH_UNIX; + + if (!(data->flags & NFS_MOUNT_NONLM)) + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + else + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + + /* + * The legacy version 6 binary mount data from userspace has a + * field used only to transport selinux information into the + * the kernel. To continue to support that functionality we + * have a touch of selinux knowledge here in the NFS code. The + * userspace code converted context=blah to just blah so we are + * converting back to the full string selinux understands. + */ + if (data->context[0]){ +#ifdef CONFIG_SECURITY_SELINUX + int ret; + + data->context[NFS_MAX_CONTEXT_LEN] = '\0'; + ret = vfs_parse_fs_string(fc, "context", + data->context, strlen(data->context)); + if (ret < 0) + return ret; +#else + return -EINVAL; +#endif + } + + break; + default: + goto generic; + } + + ctx->skip_reconfig_option_check = true; + return 0; + +generic: + return generic_parse_monolithic(fc, data); + +out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } + return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); + +out_no_v3: + return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); + +out_no_sec: + return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to handle mount options"); + return -ENOMEM; + +out_no_address: + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); + +out_invalid_fh: + return nfs_invalf(fc, "NFS: invalid root filehandle"); +} + +#if IS_ENABLED(CONFIG_NFS_V4) +/* + * Validate NFSv4 mount options + */ +static int nfs4_parse_monolithic(struct fs_context *fc, + struct nfs4_mount_data *data) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + char *c; + + if (data == NULL) + goto out_no_data; + + ctx->version = 4; + + switch (data->version) { + case 1: + if (data->host_addrlen > sizeof(ctx->nfs_server.address)) + goto out_no_address; + if (data->host_addrlen == 0) + goto out_no_address; + ctx->nfs_server.addrlen = data->host_addrlen; + if (copy_from_user(sap, data->host_addr, data->host_addrlen)) + return -EFAULT; + if (!nfs_verify_server_address(sap)) + goto out_no_address; + ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); + + if (data->auth_flavourlen) { + rpc_authflavor_t pseudoflavor; + if (data->auth_flavourlen > 1) + goto out_inval_auth; + if (copy_from_user(&pseudoflavor, + data->auth_flavours, + sizeof(pseudoflavor))) + return -EFAULT; + ctx->selected_flavor = pseudoflavor; + } else + ctx->selected_flavor = RPC_AUTH_UNIX; + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->nfs_server.hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->nfs_server.export_path = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->client_address = c; + + /* + * Translate to nfs_fs_context, which nfs_fill_super + * can deal with. + */ + + ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + + break; + default: + goto generic; + } + + ctx->skip_reconfig_option_check = true; + return 0; + +generic: + return generic_parse_monolithic(fc, data); + +out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } + return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); + +out_inval_auth: + return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", + data->auth_flavourlen); + +out_no_address: + return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); + +out_invalid_transport_udp: + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); +} +#endif + +/* + * Parse a monolithic block of data from sys_mount(). + */ +static int nfs_fs_context_parse_monolithic(struct fs_context *fc, + void *data) +{ + if (fc->fs_type == &nfs_fs_type) + return nfs23_parse_monolithic(fc, data); + +#if IS_ENABLED(CONFIG_NFS_V4) + if (fc->fs_type == &nfs4_fs_type) + return nfs4_parse_monolithic(fc, data); +#endif + + return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); +} + +/* + * Validate the preparsed information in the config. + */ +static int nfs_fs_context_validate(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_subversion *nfs_mod; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + int max_namelen = PAGE_SIZE; + int max_pathlen = NFS_MAXPATHLEN; + int port = 0; + int ret; + + if (!fc->source) + goto out_no_device_name; + + /* Check for sanity first. */ + if (ctx->minorversion && ctx->version != 4) + goto out_minorversion_mismatch; + + if (ctx->options & NFS_OPTION_MIGRATION && + (ctx->version != 4 || ctx->minorversion != 0)) + goto out_migration_misuse; + + /* Verify that any proto=/mountproto= options match the address + * families in the addr=/mountaddr= options. + */ + if (ctx->protofamily != AF_UNSPEC && + ctx->protofamily != ctx->nfs_server.address.sa_family) + goto out_proto_mismatch; + + if (ctx->mountfamily != AF_UNSPEC) { + if (ctx->mount_server.addrlen) { + if (ctx->mountfamily != ctx->mount_server.address.sa_family) + goto out_mountproto_mismatch; + } else { + if (ctx->mountfamily != ctx->nfs_server.address.sa_family) + goto out_mountproto_mismatch; + } + } + + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (ctx->version == 4) { + if (IS_ENABLED(CONFIG_NFS_V4)) { + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + else + port = NFS_PORT; + max_namelen = NFS4_MAXNAMLEN; + max_pathlen = NFS4_MAXPATHLEN; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | + NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + } else { + goto out_v4_not_compiled; + } + } else { + nfs_set_mount_transport_protocol(ctx); +#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; +#endif + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + } + + nfs_set_port(sap, &ctx->nfs_server.port, port); + + ret = nfs_parse_source(fc, max_namelen, max_pathlen); + if (ret < 0) + return ret; + + /* Load the NFS protocol module if we haven't done so yet */ + if (!ctx->nfs_mod) { + nfs_mod = get_nfs_version(ctx->version); + if (IS_ERR(nfs_mod)) { + ret = PTR_ERR(nfs_mod); + goto out_version_unavailable; + } + ctx->nfs_mod = nfs_mod; + } + return 0; + +out_no_device_name: + return nfs_invalf(fc, "NFS: Device name not specified"); +out_v4_not_compiled: + nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); + return -EPROTONOSUPPORT; +out_invalid_transport_udp: + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); +out_no_address: + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); +out_mountproto_mismatch: + return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); +out_proto_mismatch: + return nfs_invalf(fc, "NFS: Server address does not match proto= option"); +out_minorversion_mismatch: + return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", + ctx->version, ctx->minorversion); +out_migration_misuse: + return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); +out_version_unavailable: + nfs_errorf(fc, "NFS: Version unavailable"); + return ret; +} + +/* + * Create an NFS superblock by the appropriate method. + */ +static int nfs_get_tree(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err = nfs_fs_context_validate(fc); + + if (err) + return err; + if (!ctx->internal) + return ctx->nfs_mod->rpc_ops->try_get_tree(fc); + else + return nfs_get_tree_common(fc); +} + +/* + * Handle duplication of a configuration. The caller copied *src into *sc, but + * it can't deal with resource pointers in the filesystem context, so we have + * to do that. We need to clear pointers, copy data or get extra refs as + * appropriate. + */ +static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; + + ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->mntfh = nfs_alloc_fhandle(); + if (!ctx->mntfh) { + kfree(ctx); + return -ENOMEM; + } + nfs_copy_fh(ctx->mntfh, src->mntfh); + + __module_get(ctx->nfs_mod->owner); + ctx->client_address = NULL; + ctx->mount_server.hostname = NULL; + ctx->nfs_server.export_path = NULL; + ctx->nfs_server.hostname = NULL; + ctx->fscache_uniq = NULL; + ctx->clone_data.fattr = NULL; + fc->fs_private = ctx; + return 0; +} + +static void nfs_fs_context_free(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + if (ctx) { + if (ctx->server) + nfs_free_server(ctx->server); + if (ctx->nfs_mod) + put_nfs_version(ctx->nfs_mod); + kfree(ctx->client_address); + kfree(ctx->mount_server.hostname); + kfree(ctx->nfs_server.export_path); + kfree(ctx->nfs_server.hostname); + kfree(ctx->fscache_uniq); + nfs_free_fhandle(ctx->mntfh); + nfs_free_fattr(ctx->clone_data.fattr); + kfree(ctx); + } +} + +static const struct fs_context_operations nfs_fs_context_ops = { + .free = nfs_fs_context_free, + .dup = nfs_fs_context_dup, + .parse_param = nfs_fs_context_parse_param, + .parse_monolithic = nfs_fs_context_parse_monolithic, + .get_tree = nfs_get_tree, + .reconfigure = nfs_reconfigure, +}; + +/* + * Prepare superblock configuration. We use the namespaces attached to the + * context. This may be the current process's namespaces, or it may be a + * container's namespaces. + */ +static int nfs_init_fs_context(struct fs_context *fc) +{ + struct nfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); + if (unlikely(!ctx)) + return -ENOMEM; + + ctx->mntfh = nfs_alloc_fhandle(); + if (unlikely(!ctx->mntfh)) { + kfree(ctx); + return -ENOMEM; + } + + ctx->protofamily = AF_UNSPEC; + ctx->mountfamily = AF_UNSPEC; + ctx->mount_server.port = NFS_UNSPEC_PORT; + + if (fc->root) { + /* reconfigure, start with the current config */ + struct nfs_server *nfss = fc->root->d_sb->s_fs_info; + struct net *net = nfss->nfs_client->cl_net; + + ctx->flags = nfss->flags; + ctx->rsize = nfss->rsize; + ctx->wsize = nfss->wsize; + ctx->retrans = nfss->client->cl_timeout->to_retries; + ctx->selected_flavor = nfss->client->cl_auth->au_flavor; + ctx->acregmin = nfss->acregmin / HZ; + ctx->acregmax = nfss->acregmax / HZ; + ctx->acdirmin = nfss->acdirmin / HZ; + ctx->acdirmax = nfss->acdirmax / HZ; + ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + ctx->nfs_server.port = nfss->port; + ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + ctx->version = nfss->nfs_client->rpc_ops->version; + ctx->minorversion = nfss->nfs_client->cl_minorversion; + + memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, + ctx->nfs_server.addrlen); + + if (fc->net_ns != net) { + put_net(fc->net_ns); + fc->net_ns = get_net(net); + } + + ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); + } else { + /* defaults */ + ctx->timeo = NFS_UNSPEC_TIMEO; + ctx->retrans = NFS_UNSPEC_RETRANS; + ctx->acregmin = NFS_DEF_ACREGMIN; + ctx->acregmax = NFS_DEF_ACREGMAX; + ctx->acdirmin = NFS_DEF_ACDIRMIN; + ctx->acdirmax = NFS_DEF_ACDIRMAX; + ctx->nfs_server.port = NFS_UNSPEC_PORT; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; + ctx->minorversion = 0; + ctx->need_mount = true; + } + fc->fs_private = ctx; + fc->ops = &nfs_fs_context_ops; + return 0; +} + +struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .init_fs_context = nfs_init_fs_context, + .parameters = nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs"); +EXPORT_SYMBOL_GPL(nfs_fs_type); + +#if IS_ENABLED(CONFIG_NFS_V4) +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .init_fs_context = nfs_init_fs_context, + .parameters = nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs4"); +MODULE_ALIAS("nfs4"); +EXPORT_SYMBOL_GPL(nfs4_fs_type); +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7def925d3af5..52270bfac120 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -128,7 +128,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int return; key->nfs_client = nfss->nfs_client; - key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; + key->key.super.s_flags = sb->s_flags & NFS_SB_MASK; key->key.nfs_server.flags = nfss->flags; key->key.nfs_server.rsize = nfss->rsize; key->key.nfs_server.wsize = nfss->wsize; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 878c4c5982d9..b012c2668a1f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -64,66 +64,71 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i /* * get an NFS2/NFS3 root dentry from the root filehandle */ -struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, - const char *devname) +int nfs_get_root(struct super_block *s, struct fs_context *fc) { - struct nfs_server *server = NFS_SB(sb); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_server *server = NFS_SB(s); struct nfs_fsinfo fsinfo; - struct dentry *ret; + struct dentry *root; struct inode *inode; - void *name = kstrdup(devname, GFP_KERNEL); - int error; + char *name; + int error = -ENOMEM; + name = kstrdup(fc->source, GFP_KERNEL); if (!name) - return ERR_PTR(-ENOMEM); + goto out; /* get the actual root for this mount */ fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) { - kfree(name); - return ERR_PTR(-ENOMEM); - } + if (fsinfo.fattr == NULL) + goto out_name; - error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); - ret = ERR_PTR(error); - goto out; + nfs_errorf(fc, "NFS: Couldn't getattr on root"); + goto out_fattr; } - inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - ret = ERR_CAST(inode); - goto out; + error = PTR_ERR(inode); + nfs_errorf(fc, "NFS: Couldn't get root inode"); + goto out_fattr; } - error = nfs_superblock_set_dummy_root(sb, inode); - if (error != 0) { - ret = ERR_PTR(error); - goto out; - } + error = nfs_superblock_set_dummy_root(s, inode); + if (error != 0) + goto out_fattr; /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_root(inode); - if (IS_ERR(ret)) { + root = d_obtain_root(inode); + if (IS_ERR(root)) { dprintk("nfs_get_root: get root dentry failed\n"); - goto out; + error = PTR_ERR(root); + nfs_errorf(fc, "NFS: Couldn't get root dentry"); + goto out_fattr; } - security_d_instantiate(ret, inode); - spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !ret->d_fsdata && - !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { - ret->d_fsdata = name; + security_d_instantiate(root, inode); + spin_lock(&root->d_lock); + if (IS_ROOT(root) && !root->d_fsdata && + !(root->d_flags & DCACHE_NFSFS_RENAMED)) { + root->d_fsdata = name; name = NULL; } - spin_unlock(&ret->d_lock); -out: - kfree(name); + spin_unlock(&root->d_lock); + fc->root = root; + error = 0; + +out_fattr: nfs_free_fattr(fsinfo.fattr); - return ret; +out_name: + kfree(name); +out: + return error; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b0b4b9f303fd..1309e6f47f3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1061,7 +1061,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct rcu_read_lock(); list_for_each_entry_rcu(pos, &nfsi->open_files, list) { - if (cred != NULL && pos->cred != cred) + if (cred != NULL && cred_fscmp(pos->cred, cred) != 0) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; @@ -1156,7 +1156,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - if (status == -ESTALE) { + switch (status) { + case -ETIMEDOUT: + /* A soft timeout occurred. Use cached information? */ + if (server->flags & NFS_MOUNT_SOFTREVAL) + status = 0; + break; + case -ESTALE: nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24a65da58aa9..f80c47d5ff27 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,17 +4,19 @@ */ #include "nfs4_fs.h" -#include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/security.h> #include <linux/crc32.h> +#include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> #include <linux/wait_bit.h> -#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) +#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; struct nfs_string; +struct nfs_pageio_descriptor; static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { @@ -31,17 +33,14 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) return 1; } -struct nfs_clone_mount { - const struct super_block *sb; - const struct dentry *dentry; - struct nfs_fh *fh; - struct nfs_fattr *fattr; - char *hostname; - char *mnt_path; - struct sockaddr *addr; - size_t addrlen; - rpc_authflavor_t authflavor; -}; +static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) +{ + if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL)) + return false; + if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size) + return false; + return true; +} /* * Note: RFC 1813 doesn't limit the number of auth flavors that @@ -82,12 +81,16 @@ struct nfs_client_initdata { /* * In-kernel mount arguments */ -struct nfs_parsed_mount_data { - int flags; +struct nfs_fs_context { + bool internal; + bool skip_reconfig_option_check; + bool need_mount; + bool sloppy; + unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; - unsigned int acregmin, acregmax, - acdirmin, acdirmax; + unsigned int acregmin, acregmax; + unsigned int acdirmin, acdirmax; unsigned int namlen; unsigned int options; unsigned int bsize; @@ -97,10 +100,14 @@ struct nfs_parsed_mount_data { unsigned int version; unsigned int minorversion; char *fscache_uniq; - bool need_mount; + unsigned short protofamily; + unsigned short mountfamily; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; u32 version; @@ -109,19 +116,41 @@ struct nfs_parsed_mount_data { } mount_server; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; char *export_path; int port; unsigned short protocol; unsigned short nconnect; + unsigned short export_path_len; } nfs_server; - void *lsm_opts; - struct net *net; + struct nfs_fh *mntfh; + struct nfs_server *server; + struct nfs_subversion *nfs_mod; + + /* Information for a cloned mount. */ + struct nfs_clone_mount { + struct super_block *sb; + struct dentry *dentry; + struct nfs_fattr *fattr; + unsigned int inherited_bsize; + } clone_data; }; +#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) +#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) +#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) + +static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) +{ + return fc->fs_private; +} + /* mount_clnt.c */ struct nfs_mount_request { struct sockaddr *sap; @@ -137,14 +166,6 @@ struct nfs_mount_request { struct net *net; }; -struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); - struct nfs_parsed_mount_data *parsed; - struct nfs_clone_mount *cloned; - struct nfs_fh *mntfh; -}; - extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); @@ -170,13 +191,9 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); -extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, - struct nfs_subversion *); -extern struct nfs_server *nfs4_create_server( - struct nfs_mount_info *, - struct nfs_subversion *); -extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr *sap, size_t salen, struct net *net); @@ -227,7 +244,9 @@ static inline void nfs_fs_proc_exit(void) extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; -struct nfs_pageio_descriptor; +/* fs_context.c */ +extern struct file_system_type nfs_fs_type; + /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); @@ -387,23 +406,10 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; -extern struct file_system_type nfs_fs_type; -extern struct file_system_type nfs_xdev_fs_type; -#if IS_ENABLED(CONFIG_NFS_V4) -extern struct file_system_type nfs4_referral_fs_type; -#endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); -struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, - struct nfs_subversion *); -int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, - struct nfs_mount_info *, struct nfs_subversion *); -struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); -struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, - const char *, struct nfs_mount_info *); +int nfs_try_get_tree(struct fs_context *); +int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); -void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -430,18 +436,12 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); -struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); -struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, - struct nfs_fattr *, rpc_authflavor_t); +int nfs_submount(struct fs_context *, struct nfs_server *); +int nfs_do_submount(struct fs_context *); /* getroot.c */ -extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, - const char *); +extern int nfs_get_root(struct super_block *s, struct fs_context *fc); #if IS_ENABLED(CONFIG_NFS_V4) -extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, - const char *); - extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif @@ -460,7 +460,7 @@ int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); -int nfs_remount(struct super_block *sb, int *flags, char *raw_data); +int nfs_reconfigure(struct fs_context *); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, @@ -706,9 +706,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) } /* - * Convert a struct timespec into a 64-bit change attribute + * Convert a struct timespec64 into a 64-bit change attribute * - * This does approximately the same thing as timespec_to_ns(), + * This does approximately the same thing as timespec64_to_ns(), * but for calculation efficiency, we multiply the seconds by * 1024*1024*1024. */ @@ -777,3 +777,16 @@ static inline bool nfs_error_is_fatal_on_server(int err) } return nfs_error_is_fatal(err); } + +/* + * Select between a default port value and a user-specified port value. + * If a zero value is set, then autobind will be used. + */ +static inline void nfs_set_port(struct sockaddr *sap, int *port, + const unsigned short default_port) +{ + if (*port == NFS_UNSPEC_PORT) + *port = default_port; + + rpc_set_port(sap, *port); +} diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index cb7c10e9721e..35c8cb2d7637 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -29,9 +29,7 @@ */ #define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) #define MNT_status_sz (1) -#define MNT_fhs_status_sz (1) #define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) -#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) #define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) /* diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5e0e9d29f5c5..ad6077404947 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -19,6 +19,7 @@ #include <linux/vfs.h> #include <linux/sunrpc/gss_api.h> #include "internal.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -139,34 +140,65 @@ EXPORT_SYMBOL_GPL(nfs_path); */ struct vfsmount *nfs_d_automount(struct path *path) { - struct vfsmount *mnt; + struct nfs_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct nfs_server *server = NFS_SERVER(d_inode(path->dentry)); - struct nfs_fh *fh = NULL; - struct nfs_fattr *fattr = NULL; + struct nfs_client *client = server->nfs_client; + int ret; if (IS_ROOT(path->dentry)) return ERR_PTR(-ESTALE); - mnt = ERR_PTR(-ENOMEM); - fh = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - if (fh == NULL || fattr == NULL) - goto out; + /* Open a new filesystem context, transferring parameters from the + * parent superblock, including the network namespace. + */ + fc = fs_context_for_submount(&nfs_fs_type, path->dentry); + if (IS_ERR(fc)) + return ERR_CAST(fc); - mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); + ctx = nfs_fc2context(fc); + ctx->clone_data.dentry = path->dentry; + ctx->clone_data.sb = path->dentry->d_sb; + ctx->clone_data.fattr = nfs_alloc_fattr(); + if (!ctx->clone_data.fattr) + goto out_fc; + + if (fc->net_ns != client->cl_net) { + put_net(fc->net_ns); + fc->net_ns = get_net(client->cl_net); + } + + /* for submounts we want the same server; referrals will reassign */ + memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen); + ctx->nfs_server.addrlen = client->cl_addrlen; + ctx->nfs_server.port = server->port; + + ctx->version = client->rpc_ops->version; + ctx->minorversion = client->cl_minorversion; + ctx->nfs_mod = client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); + + ret = client->rpc_ops->submount(fc, server); + if (ret < 0) { + mnt = ERR_PTR(ret); + goto out_fc; + } + + up_write(&fc->root->d_sb->s_umount); + mnt = vfs_create_mount(fc); if (IS_ERR(mnt)) - goto out; + goto out_fc; if (nfs_mountpoint_expiry_timeout < 0) - goto out; + goto out_fc; mntget(mnt); /* prevent immediate expiration */ mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); -out: - nfs_free_fattr(fattr); - nfs_free_fhandle(fh); +out_fc: + put_fs_context(fc); return mnt; } @@ -213,16 +245,6 @@ void nfs_release_automount_timer(void) cancel_delayed_work(&nfs_automount_task); } -/* - * Clone a mountpoint of the appropriate type - */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, - const char *devname, - struct nfs_clone_mount *mountdata) -{ - return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); -} - /** * nfs_do_submount - set up mountpoint when crossing a filesystem boundary * @dentry: parent directory @@ -231,46 +253,62 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @authflavor: security flavor to use when performing the mount * */ -struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, rpc_authflavor_t authflavor) +int nfs_do_submount(struct fs_context *fc) { - struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, - .dentry = dentry, - .fh = fh, - .fattr = fattr, - .authflavor = authflavor, - }; - struct vfsmount *mnt; - char *page = (char *) __get_free_page(GFP_USER); - char *devname; - - if (page == NULL) - return ERR_PTR(-ENOMEM); - - devname = nfs_devname(dentry, page, PAGE_SIZE); - if (IS_ERR(devname)) - mnt = ERR_CAST(devname); - else - mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); - - free_page((unsigned long)page); - return mnt; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; + struct nfs_server *server; + char *buffer, *p; + int ret; + + /* create a new volume representation */ + server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb), + ctx->mntfh, + ctx->clone_data.fattr, + ctx->selected_flavor); + + if (IS_ERR(server)) + return PTR_ERR(server); + + ctx->server = server; + + buffer = kmalloc(4096, GFP_USER); + if (!buffer) + return -ENOMEM; + + ctx->internal = true; + ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; + + p = nfs_devname(dentry, buffer, 4096); + if (IS_ERR(p)) { + nfs_errorf(fc, "NFS: Couldn't determine submount pathname"); + ret = PTR_ERR(p); + } else { + ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p); + if (!ret) + ret = vfs_get_tree(fc); + } + kfree(buffer); + return ret; } EXPORT_SYMBOL_GPL(nfs_do_submount); -struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs_submount(struct fs_context *fc, struct nfs_server *server) { - int err; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); + int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL); + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, + ctx->mntfh, ctx->clone_data.fattr, + NULL); dput(parent); if (err != 0) - return ERR_PTR(err); + return err; - return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); + ctx->selected_flavor = server->client->cl_auth->au_flavor; + return nfs_do_submount(fc); } EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index d94c7abdf25a..f6676af37d5d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -360,17 +360,17 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); - if (attr->ia_valid & ATTR_ATIME_SET) { + if (attr->ia_valid & ATTR_ATIME_SET) p = xdr_encode_time(p, &attr->ia_atime); - } else if (attr->ia_valid & ATTR_ATIME) { + else if (attr->ia_valid & ATTR_ATIME) p = xdr_encode_current_server_time(p, &attr->ia_atime); - } else + else p = xdr_time_not_set(p); - if (attr->ia_valid & ATTR_MTIME_SET) { + if (attr->ia_valid & ATTR_MTIME_SET) xdr_encode_time(p, &attr->ia_mtime); - } else if (attr->ia_valid & ATTR_MTIME) { + else if (attr->ia_valid & ATTR_MTIME) xdr_encode_current_server_time(p, &attr->ia_mtime); - } else + else xdr_time_not_set(p); } diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index f82e11c4cb56..1b950b66b3bb 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, #endif /* CONFIG_NFS_V3_ACL */ /* nfs3client.c */ -struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_create_server(struct fs_context *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 223904bc40a7..5601e47360c2 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -46,10 +46,10 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) } #endif -struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs3_create_server(struct fs_context *fc) { - struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + struct nfs_server *server = nfs_create_server(fc); + /* Create a client RPC handle for the NFS v3 ACL management interface */ if (!IS_ERR(server)) nfs_init_server_aclclient(server); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9eb2f1a503ab..a46d1d5d16d8 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -110,10 +110,15 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -140,23 +145,23 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) { + nfs_setattr_update_inode(inode, sattr, fattr); if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setattr_update_inode(inode, sattr, fattr); } dprintk("NFS reply setattr: %d\n", status); return status; } static int -nfs3_proc_lookup(struct inode *dir, const struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs3_diropres res = { .fh = fhandle, @@ -168,20 +173,25 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; - dprintk("NFS call lookup %s\n", name->name); + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_argp = fhandle; msg.rpc_resp = fattr; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); } nfs_free_fattr(res.dir_attr); dprintk("NFS reply lookup: %d\n", status); @@ -990,7 +1000,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .nlmclnt_ops = &nlmclnt_fl_close_lock_ops, .getroot = nfs3_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 927eb680f161..69971f6c840d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1fe83e0f663e..e2ae54b35dfe 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, } else { status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_sync_inode(dst_inode); if (status) @@ -334,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = handle_async_copy(res, dst_server, src_server, src, dst, &args->src_stateid, restart); if (status) - return status; + goto out; } if ((!res->synchronous || !args->sync) && res->write_res.verifier.committed != NFS_FILE_SYNC) { status = process_copy_commit(dst, pos_dst, res); if (status) - return status; + goto out; } truncate_pagecache_range(dst_inode, pos_dst, @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); nfs_put_lock_context(l_ctx); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; - + } status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.dst_fattr = nfs_alloc_fattr(); if (!res.dst_fattr) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a7a73b1d1fec..8be1ba7c62bb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -268,14 +268,13 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t); -/* super.c */ +/* fs_context.c */ extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, const struct qstr *); -struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); +int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); @@ -303,8 +302,10 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, struct page *page, const struct cred *); extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); -extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *, - struct nfs_fh *, struct nfs_fattr *); +extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, + struct dentry *, + struct nfs_fh *, + struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern const struct xattr_handler *nfs4_xattr_handlers[]; extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, @@ -446,9 +447,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); -extern void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed); +extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease); /* nfs4state.c */ @@ -526,7 +525,6 @@ extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; @@ -536,6 +534,9 @@ extern bool recover_lost_locks; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; +extern int nfs4_try_get_tree(struct fs_context *); +extern int nfs4_get_referral_tree(struct fs_context *); + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 460d6251c405..0cd767e5c977 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1055,66 +1055,64 @@ out: /* * Create a version 4 volume record */ -static int nfs4_init_server(struct nfs_server *server, - struct nfs_parsed_mount_data *data) +static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; - server->auth_info = data->auth_info; + server->flags = ctx->flags; + server->options = ctx->options; + server->auth_info = ctx->auth_info; /* Use the first specified auth flavor. If this flavor isn't * allowed by the server, use the SECINFO path to try the * other specified flavors */ - if (data->auth_info.flavor_len >= 1) - data->selected_flavor = data->auth_info.flavors[0]; + if (ctx->auth_info.flavor_len >= 1) + ctx->selected_flavor = ctx->auth_info.flavors[0]; else - data->selected_flavor = RPC_AUTH_UNIX; + ctx->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->nfs_server.protocol, - &timeparms, - data->minorversion, - data->nfs_server.nconnect, - data->net); + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, + ctx->client_address, + ctx->nfs_server.protocol, + &timeparms, + ctx->minorversion, + ctx->nfs_server.nconnect, + fc->net_ns); if (error < 0) return error; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - server->port = data->nfs_server.port; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; + server->port = ctx->nfs_server.port; return nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + ctx->selected_flavor); } /* * Create a version 4 volume record * - keyed on server and FSID */ -/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh)*/ -struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs4_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; bool auth_probe; int error; @@ -1125,14 +1123,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, server->cred = get_cred(current_cred()); - auth_probe = mount_info->parsed->auth_info.flavor_len < 1; + auth_probe = ctx->auth_info.flavor_len < 1; /* set up the general RPC client */ - error = nfs4_init_server(server, mount_info->parsed); + error = nfs4_init_server(server, fc); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; @@ -1146,9 +1144,9 @@ error: /* * Create an NFS4 referral server record */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) +struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_client *parent_client; struct nfs_server *server, *parent_server; bool auth_probe; @@ -1158,7 +1156,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (!server) return ERR_PTR(-ENOMEM); - parent_server = NFS_SB(data->sb); + parent_server = NFS_SB(ctx->clone_data.sb); parent_client = parent_server->nfs_client; server->cred = get_cred(parent_server->cred); @@ -1168,10 +1166,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Get a client representation */ #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) - rpc_set_port(data->addr, NFS_RDMA_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_RDMA, parent_server->client->cl_timeout, @@ -1182,10 +1181,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ - rpc_set_port(data->addr, NFS_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_TCP, parent_server->client->cl_timeout, @@ -1198,13 +1198,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) init_server: #endif - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, + ctx->selected_flavor); if (error < 0) goto error; auth_probe = parent_server->auth_info.flavor_len < 1; - error = nfs4_server_common_setup(server, mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 620de905cba9..be4eb720d5b6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/falloc.h> +#include <linux/mount.h> #include <linux/nfs_fs.h> #include "delegation.h" #include "internal.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2e460c33ae48..84026e7b8a5f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -8,6 +8,7 @@ * NFSv4 namespace */ +#include <linux/module.h> #include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> @@ -21,37 +22,64 @@ #include <linux/inet.h> #include "internal.h" #include "nfs4_fs.h" +#include "nfs.h" #include "dns_resolve.h" #define NFSDBG_FACILITY NFSDBG_VFS /* + * Work out the length that an NFSv4 path would render to as a standard posix + * path, with a leading slash but no terminating slash. + */ +static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname) +{ + ssize_t len = 0; + int i; + + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; + + if (component->len > NAME_MAX) + goto too_long; + len += 1 + component->len; /* Adding "/foo" */ + if (len > PATH_MAX) + goto too_long; + } + return len; + +too_long: + return -ENAMETOOLONG; +} + +/* * Convert the NFSv4 pathname components into a standard posix path. - * - * Note that the resulting string will be placed at the end of the buffer */ -static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, - char *buffer, ssize_t buflen) +static char *nfs4_pathname_string(const struct nfs4_pathname *pathname, + unsigned short *_len) { - char *end = buffer + buflen; - int n; + ssize_t len; + char *buf, *p; + int i; + + len = nfs4_pathname_len(pathname); + if (len < 0) + return ERR_PTR(len); + *_len = len; + + p = buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; - *--end = '\0'; - buflen--; - - n = pathname->ncomponents; - while (--n >= 0) { - const struct nfs4_string *component = &pathname->components[n]; - buflen -= component->len + 1; - if (buflen < 0) - goto Elong; - end -= component->len; - memcpy(end, component->data, component->len); - *--end = '/'; + *p++ = '/'; + memcpy(p, component->data, component->len); + p += component->len; } - return end; -Elong: - return ERR_PTR(-ENAMETOOLONG); + + *p = 0; + return buf; } /* @@ -100,21 +128,36 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) */ static int nfs4_validate_fspath(struct dentry *dentry, const struct nfs4_fs_locations *locations, - char *page, char *page2) + struct nfs_fs_context *ctx) { - const char *path, *fs_path; + const char *path; + char *fs_path; + unsigned short len; + char *buf; + int n; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) + buf = kmalloc(4096, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + path = nfs4_path(dentry, buf, 4096); + if (IS_ERR(path)) { + kfree(buf); return PTR_ERR(path); + } - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) + fs_path = nfs4_pathname_string(&locations->fs_path, &len); + if (IS_ERR(fs_path)) { + kfree(buf); return PTR_ERR(fs_path); + } - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + n = strncmp(path, fs_path, len); + kfree(buf); + kfree(fs_path); + if (n != 0) { dprintk("%s: path %s does not begin with fsroot %s\n", - __func__, path, fs_path); + __func__, path, ctx->nfs_server.export_path); return -ENOENT; } @@ -236,55 +279,77 @@ out: return new; } -static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, - char *page, char *page2, - const struct nfs4_fs_location *location) +static int try_location(struct fs_context *fc, + const struct nfs4_fs_location *location) { - const size_t addr_bufsize = sizeof(struct sockaddr_storage); - struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); - struct vfsmount *mnt = ERR_PTR(-ENOENT); - char *mnt_path; - unsigned int maxbuflen; - unsigned int s; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + unsigned int len, s; + char *export_path, *source, *p; + int ret = -ENOENT; + + /* Allocate a buffer big enough to hold any of the hostnames plus a + * terminating char and also a buffer big enough to hold the hostname + * plus a colon plus the path. + */ + len = 0; + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; + if (buf->len > len) + len = buf->len; + } - mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); - if (IS_ERR(mnt_path)) - return ERR_CAST(mnt_path); - mountdata->mnt_path = mnt_path; - maxbuflen = mnt_path - 1 - page2; + kfree(ctx->nfs_server.hostname); + ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + return -ENOMEM; - mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL); - if (mountdata->addr == NULL) - return ERR_PTR(-ENOMEM); + export_path = nfs4_pathname_string(&location->rootpath, + &ctx->nfs_server.export_path_len); + if (IS_ERR(export_path)) + return PTR_ERR(export_path); + + ctx->nfs_server.export_path = export_path; + + source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1, + GFP_KERNEL); + if (!source) + return -ENOMEM; + kfree(fc->source); + fc->source = source; for (s = 0; s < location->nservers; s++) { const struct nfs4_string *buf = &location->servers[s]; - if (buf->len <= 0 || buf->len >= maxbuflen) - continue; - if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, net); - if (mountdata->addrlen == 0) + ctx->nfs_server.addrlen = + nfs_parse_server_name(buf->data, buf->len, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address), + fc->net_ns); + if (ctx->nfs_server.addrlen == 0) continue; - memcpy(page2, buf->data, buf->len); - page2[buf->len] = '\0'; - mountdata->hostname = page2; + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); - snprintf(page, PAGE_SIZE, "%s:%s", - mountdata->hostname, - mountdata->mnt_path); + memcpy(ctx->nfs_server.hostname, buf->data, buf->len); + ctx->nfs_server.hostname[buf->len] = '\0'; - mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); - if (!IS_ERR(mnt)) - break; + p = source; + memcpy(p, buf->data, buf->len); + p += buf->len; + *p++ = ':'; + memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len); + p += ctx->nfs_server.export_path_len; + *p = 0; + + ret = nfs4_get_referral_tree(fc); + if (ret == 0) + return 0; } - kfree(mountdata->addr); - return mnt; + + return ret; } /** @@ -293,38 +358,23 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, * @locations: array of NFSv4 server location information * */ -static struct vfsmount *nfs_follow_referral(struct dentry *dentry, - const struct nfs4_fs_locations *locations) +static int nfs_follow_referral(struct fs_context *fc, + const struct nfs4_fs_locations *locations) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); - struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, - .dentry = dentry, - .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor, - }; - char *page = NULL, *page2 = NULL; + struct nfs_fs_context *ctx = nfs_fc2context(fc); int loc, error; if (locations == NULL || locations->nlocations <= 0) - goto out; - - dprintk("%s: referral at %pd2\n", __func__, dentry); - - page = (char *) __get_free_page(GFP_USER); - if (!page) - goto out; + return -ENOENT; - page2 = (char *) __get_free_page(GFP_USER); - if (!page2) - goto out; + dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry); /* Ensure fs path is a prefix of current dentry path */ - error = nfs4_validate_fspath(dentry, locations, page, page2); - if (error < 0) { - mnt = ERR_PTR(error); - goto out; - } + error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx); + if (error < 0) + return error; + error = -ENOENT; for (loc = 0; loc < locations->nlocations; loc++) { const struct nfs4_fs_location *location = &locations->locations[loc]; @@ -332,15 +382,12 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, location->rootpath.ncomponents == 0) continue; - mnt = try_location(&mountdata, page, page2, location); - if (!IS_ERR(mnt)) - break; + error = try_location(fc, location); + if (error == 0) + return 0; } -out: - free_page((unsigned long) page); - free_page((unsigned long) page2); - return mnt; + return error; } /* @@ -348,71 +395,72 @@ out: * @dentry - dentry of referral * */ -static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) { - struct vfsmount *mnt = ERR_PTR(-ENOMEM); - struct dentry *parent; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry, *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; - int err; + int err = -ENOMEM; /* BUG_ON(IS_ROOT(dentry)); */ page = alloc_page(GFP_KERNEL); - if (page == NULL) - return mnt; + if (!page) + return -ENOMEM; fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (fs_locations == NULL) + if (!fs_locations) goto out_free; /* Get locations */ - mnt = ERR_PTR(-ENOENT); - + dentry = ctx->clone_data.dentry; parent = dget_parent(dentry); dprintk("%s: getting locations for %pd2\n", __func__, dentry); err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page); dput(parent); - if (err != 0 || - fs_locations->nlocations <= 0 || + if (err != 0) + goto out_free_2; + + err = -ENOENT; + if (fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) - goto out_free; + goto out_free_2; - mnt = nfs_follow_referral(dentry, fs_locations); + err = nfs_follow_referral(fc, fs_locations); +out_free_2: + kfree(fs_locations); out_free: __free_page(page); - kfree(fs_locations); - return mnt; + return err; } -struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs4_submount(struct fs_context *fc, struct nfs_server *server) { - rpc_authflavor_t flavor = server->client->cl_auth->au_flavor; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); - const struct qstr *name = &dentry->d_name; struct rpc_clnt *client; - struct vfsmount *mnt; + int ret; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); + client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh, + ctx->clone_data.fattr); dput(parent); if (IS_ERR(client)) - return ERR_CAST(client); + return PTR_ERR(client); - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - mnt = nfs_do_refmount(client, dentry); - goto out; + ctx->selected_flavor = client->cl_auth->au_flavor; + if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + ret = nfs_do_refmount(fc, client); + } else { + ret = nfs_do_submount(fc); } - if (client->cl_auth->au_flavor != flavor) - flavor = client->cl_auth->au_flavor; - mnt = nfs_do_submount(dentry, fh, fattr, flavor); -out: rpc_shutdown_client(client); - return mnt; + return ret; } /* @@ -453,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, rpc_set_port(sap, NFS_PORT); error = -ENOMEM; - hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); + hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL); if (hostname == NULL) break; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 76d37161409a..95d07a3dc5d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1097,11 +1097,12 @@ static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup) return ret; } -static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) +static int nfs4_do_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + unsigned short task_flags) { struct nfs_client *clp = server->nfs_client; struct nfs4_call_sync_data data = { @@ -1113,12 +1114,23 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .rpc_client = clnt, .rpc_message = msg, .callback_ops = clp->cl_mvops->call_sync_ops, - .callback_data = &data + .callback_data = &data, + .flags = task_flags, }; return nfs4_call_sync_custom(&task_setup); } +static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res) +{ + return nfs4_do_call_sync(clnt, server, msg, args, res, 0); +} + + int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -3187,6 +3199,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; @@ -3239,6 +3256,8 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; + else if (status == -EAGAIN) + goto zero_stateid; } else { zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); @@ -4064,11 +4083,18 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs_fattr_init(fattr); - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + return nfs4_do_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, task_flags); } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, @@ -4156,7 +4182,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_server *server = NFS_SERVER(dir); @@ -4164,7 +4190,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = name, + .name = &dentry->d_name, }; struct nfs4_lookup_res res = { .server = server, @@ -4177,13 +4203,20 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; args.bitmask = nfs4_bitmask(server, label); nfs_fattr_init(fattr); - dprintk("NFS call lookup %s\n", name->name); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); + dprintk("NFS call lookup %pd2\n", dentry); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_do_call_sync(clnt, server, &msg, + &args.seq_args, &res.seq_res, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -4197,16 +4230,17 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; + const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4241,14 +4275,14 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4257,13 +4291,13 @@ static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, } struct rpc_clnt * -nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name, +nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; @@ -5019,16 +5053,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str struct nfs4_exception exception = { .interruptible = true, }; - unsigned long now = jiffies; int err; do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { - nfs4_set_lease_period(server->nfs_client, - fsinfo->lease_time * HZ, - now); + nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ); break; } err = nfs4_handle_exception(server, err, &exception); @@ -5582,10 +5613,9 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; + struct page **pages; struct nfs_getaclargs args = { .fh = NFS_FH(inode), - .acl_pages = pages, .acl_len = buflen, }; struct nfs_getaclres res = { @@ -5596,11 +5626,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + unsigned int npages; int ret = -ENOMEM, i; + struct nfs_server *server = NFS_SERVER(inode); - if (npages > ARRAY_SIZE(pages)) - return -ERANGE; + if (buflen == 0) + buflen = server->rsize; + + npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); + if (!pages) + return -ENOMEM; + + args.acl_pages = pages; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -5646,6 +5684,7 @@ out_free: __free_page(pages[i]); if (res.acl_scratch) __free_page(res.acl_scratch); + kfree(pages); return ret; } @@ -6084,6 +6123,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .callback_data = &setclientid, .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; + unsigned long now = jiffies; int status; /* nfs_client_id4 */ @@ -6116,6 +6156,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } + + if (status == 0) + do_renew_lease(clp, now); out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); @@ -6859,7 +6902,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; nfs4_schedule_lease_recovery(server->nfs_client); - }; + } } static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) @@ -8203,6 +8246,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre struct rpc_task *task; struct nfs41_exchange_id_args *argp; struct nfs41_exchange_id_res *resp; + unsigned long now = jiffies; int status; task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); @@ -8223,6 +8267,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; + do_renew_lease(clp, now); + clp->cl_clientid = resp->clientid; clp->cl_exchange_flags = resp->flags; clp->cl_seqid = resp->seqid; @@ -8626,7 +8672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, case -EACCES: case -EAGAIN: goto out; - }; + } clp->cl_seqid++; if (!status) { @@ -10001,7 +10047,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, - .try_mount = nfs4_try_mount, + .try_get_tree = nfs4_try_get_tree, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6ea431b067dd..ff876dda7f06 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp) * * @clp: pointer to nfs_client * @lease: new value for lease period - * @lastrenewed: time at which lease was last renewed */ void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed) + unsigned long lease) { spin_lock(&clp->cl_lock); clp->cl_lease_time = lease; - clp->cl_last_renewal = lastrenewed; spin_unlock(&clp->cl_lock); /* Cap maximum reconnect timeout at 1/2 lease period */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 34552329233d..f7723d221945 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp) { int status; struct nfs_fsinfo fsinfo; - unsigned long now; if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { nfs4_schedule_state_renewal(clp); return 0; } - now = jiffies; status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ); nfs4_schedule_state_renewal(clp); } @@ -766,6 +764,7 @@ void nfs4_put_open_state(struct nfs4_state *state) list_del(&state->open_states); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); + nfs4_inode_return_delegation_on_close(inode); iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); @@ -1135,7 +1134,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; - }; + } /* * Note: no locking needed as we are guaranteed to be first * on the sequence list diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2c9cbade561a..1475f932d7da 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -4,6 +4,7 @@ */ #include <linux/init.h> #include <linux/module.h> +#include <linux/mount.h> #include <linux/nfs4_mount.h> #include <linux/nfs_fs.h> #include "delegation.h" @@ -18,36 +19,6 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); static void nfs4_evict_inode(struct inode *inode); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, @@ -61,16 +32,15 @@ static const struct super_operations nfs4_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; struct nfs_subversion nfs_v4 = { - .owner = THIS_MODULE, - .nfs_fs = &nfs4_fs_type, - .rpc_vers = &nfs_version4, - .rpc_ops = &nfs_v4_clientops, - .sops = &nfs4_sops, - .xattr = nfs4_xattr_handlers, + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, }; static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -101,53 +71,6 @@ static void nfs4_evict_inode(struct inode *inode) nfs_clear_inode(inode); } -/* - * Get the superblock for the NFS4 root partition - */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - struct nfs_mount_info *mount_info = info; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - server = nfs4_create_server(mount_info, &nfs_v4); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); - -out: - return mntroot; -} - -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; - - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) - return ERR_PTR(-ENOMEM); - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); - kfree(root_devname); - return root_mnt; -} - struct nfs_referral_count { struct list_head list; const struct task_struct *task; @@ -214,111 +137,125 @@ static void nfs_referral_loop_unprotect(void) kfree(p); } -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) +static int do_nfs4_mount(struct nfs_server *server, + struct fs_context *fc, + const char *hostname, + const char *export_path) { + struct nfs_fs_context *root_ctx; + struct fs_context *root_fc; + struct vfsmount *root_mnt; struct dentry *dentry; - int err; + size_t len; + int ret; - if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); + struct fs_parameter param = { + .key = "source", + .type = fs_value_is_string, + .dirfd = -1, + }; - err = nfs_referral_loop_protect(); - if (err) { - mntput(root_mnt); - return ERR_PTR(err); + if (IS_ERR(server)) + return PTR_ERR(server); + + root_fc = vfs_dup_fs_context(fc); + if (IS_ERR(root_fc)) { + nfs_free_server(server); + return PTR_ERR(root_fc); } + kfree(root_fc->source); + root_fc->source = NULL; - dentry = mount_subtree(root_mnt, export_path); - nfs_referral_loop_unprotect(); + root_ctx = nfs_fc2context(root_fc); + root_ctx->internal = true; + root_ctx->server = server; + /* We leave export_path unset as it's not used to find the root. */ - return dentry; -} + len = strlen(hostname) + 5; + param.string = kmalloc(len, GFP_KERNEL); + if (param.string == NULL) { + put_fs_context(root_fc); + return -ENOMEM; + } -struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) -{ - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - struct nfs_parsed_mount_data *data = mount_info->parsed; + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + param.size = snprintf(param.string, len, "[%s]:/", hostname); + else + param.size = snprintf(param.string, len, "%s:/", hostname); + ret = vfs_parse_fs_param(root_fc, ¶m); + kfree(param.string); + if (ret < 0) { + put_fs_context(root_fc); + return ret; + } + root_mnt = fc_mount(root_fc); + put_fs_context(root_fc); + + if (IS_ERR(root_mnt)) + return PTR_ERR(root_mnt); - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + ret = nfs_referral_loop_protect(); + if (ret) { + mntput(root_mnt); + return ret; + } - export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); - data->nfs_server.export_path = export_path; + dentry = mount_subtree(root_mnt, export_path); + nfs_referral_loop_unprotect(); - res = nfs_follow_remote_path(root_mnt, export_path); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - return res; + fc->root = dentry; + return 0; } -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +int nfs4_try_get_tree(struct fs_context *fc) { - struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; + dfprintk(MOUNT, "--> nfs4_try_get_tree()\n"); - /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; + /* We create a mount for the server's root, walk to the requested + * location and then create another mount for that. + */ + err= do_nfs4_mount(nfs4_create_server(fc), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); } - - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; + return err; } /* * Create an NFS4 server record on referral traversal */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +int nfs4_get_referral_tree(struct fs_context *fc) { - struct nfs_clone_mount *data = raw_data; - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; dprintk("--> nfs4_referral_mount()\n"); - export_path = data->mnt_path; - data->mnt_path = "/"; - - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); - data->mnt_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - return res; + /* create a new volume representation */ + err = do_nfs4_mount(nfs4_create_referral_server(fc), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); + } + return err; } - static int __init init_nfs_v4(void) { int err; diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 1a8f376b3f73..d9ac556bebcf 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -24,4 +24,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); + +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e60b6fbd5ada..1e97e5e04cb4 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -155,6 +155,9 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); TRACE_DEFINE_ENUM(NFS4ERR_XDEV); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); + #define show_nfsv4_errors(error) \ __print_symbolic(error, \ { NFS4_OK, "OK" }, \ @@ -305,7 +308,10 @@ TRACE_DEFINE_ENUM(NFS4ERR_XDEV); { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }) + { NFS4ERR_XDEV, "XDEV" }, \ + /* ***** Internal to Linux NFS client ***** */ \ + { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ + { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) #define show_open_flags(flags) \ __print_flags(flags, "|", \ @@ -352,7 +358,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(dstaddr, clp->cl_hostname); ), @@ -432,7 +438,8 @@ TRACE_EVENT(nfs4_sequence_done, __entry->target_highest_slotid = res->sr_target_highest_slotid; __entry->status_flags = res->sr_status_flags; - __entry->error = res->sr_status; + __entry->error = res->sr_status < 0 ? + -res->sr_status : 0; ), TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " @@ -640,7 +647,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, ), TP_fast_assign( - __entry->error = status; + __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; __assign_str(hostname, clp->cl_hostname); __assign_str(section, section); @@ -659,7 +666,7 @@ TRACE_EVENT(nfs4_xdr_status, TP_PROTO( const struct xdr_stream *xdr, u32 op, - int error + u32 error ), TP_ARGS(xdr, op, error), @@ -691,6 +698,41 @@ TRACE_EVENT(nfs4_xdr_status, ) ); +DECLARE_EVENT_CLASS(nfs4_cb_error_class, + TP_PROTO( + __be32 xid, + u32 cb_ident + ), + + TP_ARGS(xid, cb_ident), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, cbident) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(xid); + __entry->cbident = cb_ident; + ), + + TP_printk( + "xid=0x%08x cb_ident=0x%08x", + __entry->xid, __entry->cbident + ) +); + +#define DEFINE_CB_ERROR_EVENT(name) \ + DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \ + TP_PROTO( \ + __be32 xid, \ + u32 cb_ident \ + ), \ + TP_ARGS(xid, cb_ident)) + +DEFINE_CB_ERROR_EVENT(no_clp); +DEFINE_CB_ERROR_EVENT(badprinc); + DECLARE_EVENT_CLASS(nfs4_open_event, TP_PROTO( const struct nfs_open_context *ctx, @@ -849,7 +891,7 @@ TRACE_EVENT(nfs4_close, __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fmode = (__force unsigned int)state->state; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid.seqid); __entry->stateid_hash = @@ -914,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -986,7 +1028,7 @@ TRACE_EVENT(nfs4_set_lock, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -1164,7 +1206,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_fast_assign( __entry->dev = res->server->s_dev; __entry->fhandle = nfs_fhandle_hash(args->fhandle); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid->seqid); __entry->stateid_hash = @@ -1204,7 +1246,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); @@ -1306,7 +1348,7 @@ TRACE_EVENT(nfs4_lookupp, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = NFS_FILEID(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1342,7 +1384,7 @@ TRACE_EVENT(nfs4_rename, __entry->dev = olddir->i_sb->s_dev; __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(oldname, oldname->name); __assign_str(newname, newname->name); ), @@ -1433,7 +1475,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1489,7 +1531,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __entry->valid = fattr->valid; __entry->fhandle = nfs_fhandle_hash(fhandle); __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1536,7 +1578,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1593,7 +1635,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1694,7 +1736,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1702,13 +1745,18 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1718,14 +1766,14 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1754,7 +1802,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1762,13 +1811,18 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1778,14 +1832,14 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1814,24 +1868,28 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(loff_t, offset) - __field(size_t, count) __field(unsigned long, error) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = data->args.offset; __entry->count = data->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu", + "offset=%lld count=%u", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1896,7 +1954,7 @@ TRACE_EVENT(nfs4_layoutget, __entry->iomode = args->iomode; __entry->offset = args->offset; __entry->count = args->length; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -2094,6 +2152,115 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); +DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, + TP_PROTO( + const struct nfs_pgio_header *hdr + ), + + TP_ARGS(hdr), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __string(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + + __entry->error = hdr->res.op_status; + __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stateid_seq = + be32_to_cpu(hdr->args.stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&hdr->args.stateid); + __assign_str(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __entry->stateid_seq, __entry->stateid_hash, + __get_str(dstaddr) + ) +); + +#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ + DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ + TP_PROTO( \ + const struct nfs_pgio_header *hdr \ + ), \ + TP_ARGS(hdr)) +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); + +TRACE_EVENT(ff_layout_commit_error, + TP_PROTO( + const struct nfs_commit_data *data + ), + + TP_ARGS(data), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __string(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + + __entry->error = data->res.op_status; + __entry->fhandle = nfs_fhandle_hash(data->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __assign_str(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __get_str(dstaddr) + ) +); + + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 728d88b6a698..47817ef0aadb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1061,7 +1061,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static __be32 * xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t) { - p = xdr_encode_hyper(p, (__s64)t->tv_sec); + p = xdr_encode_hyper(p, t->tv_sec); *p++ = cpu_to_be32(t->tv_nsec); return p; } @@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 2a82dcce5fc1..a9588d19a5ae 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -198,7 +198,66 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); -DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); + +TRACE_EVENT(nfs_access_exit, + TP_PROTO( + const struct inode *inode, + unsigned int mask, + unsigned int permitted, + int error + ), + + TP_ARGS(inode, mask, permitted, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(unsigned char, type) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, size) + __field(unsigned long, nfsi_flags) + __field(unsigned long, cache_validity) + __field(unsigned int, mask) + __field(unsigned int, permitted) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->error = error < 0 ? -error : 0; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->type = nfs_umode_to_dtype(inode->i_mode); + __entry->version = inode_peek_iversion_raw(inode); + __entry->size = i_size_read(inode); + __entry->nfsi_flags = nfsi->flags; + __entry->cache_validity = nfsi->cache_validity; + __entry->mask = mask; + __entry->permitted = permitted; + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "type=%u (%s) version=%llu size=%lld " + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " + "mask=0x%x permitted=0x%x", + -__entry->error, nfs_show_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->type, + nfs_show_file_type(__entry->type), + (unsigned long long)__entry->version, + (long long)__entry->size, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity), + __entry->nfsi_flags, + nfs_show_nfsi_flags(__entry->nfsi_flags), + __entry->mask, __entry->permitted + ) +); TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); @@ -818,75 +877,85 @@ TRACE_EVENT(nfs_sillyrename_unlink, TRACE_EVENT(nfs_initiate_read, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_readpage_done, TP_PROTO( - const struct inode *inode, - int status, loff_t offset, bool eof + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, eof), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(bool, eof) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(bool, eof) + __field(int, status) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); - - __entry->status = status; - __entry->offset = offset; - __entry->eof = eof; + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d%s", + "offset=%lld count=%u res=%u status=%d%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, __entry->eof ? " eof" : "" ) ); @@ -903,90 +972,144 @@ TRACE_DEFINE_ENUM(NFS_FILE_SYNC); TRACE_EVENT(nfs_initiate_write, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count, - enum nfs3_stable_how stable + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count, stable), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) - __field(enum nfs3_stable_how, stable) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(enum nfs3_stable_how, stable) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; - __entry->stable = stable; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stable = hdr->args.stable; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu stable=%s", + "offset=%lld count=%u stable=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count, + (long long)__entry->offset, __entry->count, nfs_show_stable(__entry->stable) ) ); TRACE_EVENT(nfs_writeback_done, TP_PROTO( - const struct inode *inode, - int status, - loff_t offset, - struct nfs_writeverf *writeverf + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, writeverf), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(enum nfs3_stable_how, stable) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); - - __entry->status = status; - __entry->offset = offset; - __entry->stable = writeverf->committed; - memcpy(&__entry->verifier, &writeverf->verifier, - sizeof(__entry->verifier)); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = hdr->res.verf; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d stable=%s " - "verifier 0x%016llx", + "offset=%lld count=%u res=%u status=%d stable=%s " + "verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, nfs_show_stable(__entry->stable), - __entry->verifier + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); +DECLARE_EVENT_CLASS(nfs_page_error_class, + TP_PROTO( + const struct nfs_page *req, + int error + ), + + TP_ARGS(req, error), + + TP_STRUCT__entry( + __field(const void *, req) + __field(pgoff_t, index) + __field(unsigned int, offset) + __field(unsigned int, pgbase) + __field(unsigned int, bytes) + __field(int, error) + ), + + TP_fast_assign( + __entry->req = req; + __entry->index = req->wb_index; + __entry->offset = req->wb_offset; + __entry->pgbase = req->wb_pgbase; + __entry->bytes = req->wb_bytes; + __entry->error = error; + ), + + TP_printk( + "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d", + __entry->req, __entry->index, __entry->offset, + __entry->pgbase, __entry->bytes, __entry->error + ) +); + +#define DEFINE_NFS_PAGEERR_EVENT(name) \ + DEFINE_EVENT(nfs_page_error_class, name, \ + TP_PROTO( \ + const struct nfs_page *req, \ + int error \ + ), \ + TP_ARGS(req, error)) + +DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error); + TRACE_EVENT(nfs_initiate_commit, TP_PROTO( const struct nfs_commit_data *data @@ -995,71 +1118,81 @@ TRACE_EVENT(nfs_initiate_commit, TP_ARGS(data), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_commit_done, TP_PROTO( + const struct rpc_task *task, const struct nfs_commit_data *data ), - TP_ARGS(data), + TP_ARGS(task, data), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = data->res.verf; - __entry->status = data->res.op_status; + __entry->status = task->tk_status; __entry->offset = data->args.offset; - memcpy(&__entry->verifier, &data->verf.verifier, - sizeof(__entry->verifier)); + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d verifier 0x%016llx", + "offset=%lld status=%d stable=%s verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, - __entry->verifier + (long long)__entry->offset, __entry->status, + nfs_show_stable(__entry->stable), + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cec3070ab577..542ea8dfd1bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1425,7 +1425,7 @@ retry: /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn || cred != lo->plh_lc_cred) + if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0) goto out_noroc; roc = layoutreturn; @@ -1998,8 +1998,6 @@ lookup_again: trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - if (status != -EAGAIN) - goto out_unlock; spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f8a38065c7e4..0fafdadc9c8d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -79,6 +79,10 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -91,10 +95,6 @@ enum pnfs_try_status { #define NFS4_DEF_DS_RETRANS 5 #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - enum { NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 82af4809b869..8b37e7f8e789 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0f7288b94633..15c865cc837f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -108,10 +108,15 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -147,14 +152,14 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, const struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs_diropok res = { .fh = fhandle, @@ -166,10 +171,15 @@ nfs_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; - dprintk("NFS call lookup %s\n", name->name); + dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -710,7 +720,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfe0b586eadd..34bb9add2302 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -214,7 +214,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, task_setup_data->flags |= swap_flags; rpc_ops->read_setup(hdr, msg); - trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes); + trace_nfs_initiate_read(hdr); } static void @@ -247,8 +247,7 @@ static int nfs_readpage_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); - trace_nfs_readpage_done(inode, task->tk_status, - hdr->args.offset, hdr->res.eof); + trace_nfs_readpage_done(task, hdr); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); @@ -282,6 +281,8 @@ static void nfs_readpage_retry(struct rpc_task *task, argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; + resp->count = 0; + resp->eof = 0; rpc_restart_call_prepare(task); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8d8d04bb9d64..dada09b391c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -69,250 +69,6 @@ #include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_TEXT_DATA 1 - -#if IS_ENABLED(CONFIG_NFS_V3) -#define NFS_DEFAULT_VERSION 3 -#else -#define NFS_DEFAULT_VERSION 2 -#endif - -#define NFS_MAX_CONNECTIONS 16 - -enum { - /* Mount options that take no arguments */ - Opt_soft, Opt_softerr, Opt_hard, - Opt_posix, Opt_noposix, - Opt_cto, Opt_nocto, - Opt_ac, Opt_noac, - Opt_lock, Opt_nolock, - Opt_udp, Opt_tcp, Opt_rdma, - Opt_acl, Opt_noacl, - Opt_rdirplus, Opt_nordirplus, - Opt_sharecache, Opt_nosharecache, - Opt_resvport, Opt_noresvport, - Opt_fscache, Opt_nofscache, - Opt_migration, Opt_nomigration, - - /* Mount options that take integer arguments */ - Opt_port, - Opt_rsize, Opt_wsize, Opt_bsize, - Opt_timeo, Opt_retrans, - Opt_acregmin, Opt_acregmax, - Opt_acdirmin, Opt_acdirmax, - Opt_actimeo, - Opt_namelen, - Opt_mountport, - Opt_mountvers, - Opt_minorversion, - - /* Mount options that take string arguments */ - Opt_nfsvers, - Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, - Opt_addr, Opt_mountaddr, Opt_clientaddr, - Opt_nconnect, - Opt_lookupcache, - Opt_fscache_uniq, - Opt_local_lock, - - /* Special mount options */ - Opt_userspace, Opt_deprecated, Opt_sloppy, - - Opt_err -}; - -static const match_table_t nfs_mount_option_tokens = { - { Opt_userspace, "bg" }, - { Opt_userspace, "fg" }, - { Opt_userspace, "retry=%s" }, - - { Opt_sloppy, "sloppy" }, - - { Opt_soft, "soft" }, - { Opt_softerr, "softerr" }, - { Opt_hard, "hard" }, - { Opt_deprecated, "intr" }, - { Opt_deprecated, "nointr" }, - { Opt_posix, "posix" }, - { Opt_noposix, "noposix" }, - { Opt_cto, "cto" }, - { Opt_nocto, "nocto" }, - { Opt_ac, "ac" }, - { Opt_noac, "noac" }, - { Opt_lock, "lock" }, - { Opt_nolock, "nolock" }, - { Opt_udp, "udp" }, - { Opt_tcp, "tcp" }, - { Opt_rdma, "rdma" }, - { Opt_acl, "acl" }, - { Opt_noacl, "noacl" }, - { Opt_rdirplus, "rdirplus" }, - { Opt_nordirplus, "nordirplus" }, - { Opt_sharecache, "sharecache" }, - { Opt_nosharecache, "nosharecache" }, - { Opt_resvport, "resvport" }, - { Opt_noresvport, "noresvport" }, - { Opt_fscache, "fsc" }, - { Opt_nofscache, "nofsc" }, - { Opt_migration, "migration" }, - { Opt_nomigration, "nomigration" }, - - { Opt_port, "port=%s" }, - { Opt_rsize, "rsize=%s" }, - { Opt_wsize, "wsize=%s" }, - { Opt_bsize, "bsize=%s" }, - { Opt_timeo, "timeo=%s" }, - { Opt_retrans, "retrans=%s" }, - { Opt_acregmin, "acregmin=%s" }, - { Opt_acregmax, "acregmax=%s" }, - { Opt_acdirmin, "acdirmin=%s" }, - { Opt_acdirmax, "acdirmax=%s" }, - { Opt_actimeo, "actimeo=%s" }, - { Opt_namelen, "namlen=%s" }, - { Opt_mountport, "mountport=%s" }, - { Opt_mountvers, "mountvers=%s" }, - { Opt_minorversion, "minorversion=%s" }, - - { Opt_nfsvers, "nfsvers=%s" }, - { Opt_nfsvers, "vers=%s" }, - - { Opt_sec, "sec=%s" }, - { Opt_proto, "proto=%s" }, - { Opt_mountproto, "mountproto=%s" }, - { Opt_addr, "addr=%s" }, - { Opt_clientaddr, "clientaddr=%s" }, - { Opt_mounthost, "mounthost=%s" }, - { Opt_mountaddr, "mountaddr=%s" }, - - { Opt_nconnect, "nconnect=%s" }, - - { Opt_lookupcache, "lookupcache=%s" }, - { Opt_fscache_uniq, "fsc=%s" }, - { Opt_local_lock, "local_lock=%s" }, - - /* The following needs to be listed after all other options */ - { Opt_nfsvers, "v%s" }, - - { Opt_err, NULL } -}; - -enum { - Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, - Opt_xprt_rdma6, - - Opt_xprt_err -}; - -static const match_table_t nfs_xprt_protocol_tokens = { - { Opt_xprt_udp, "udp" }, - { Opt_xprt_udp6, "udp6" }, - { Opt_xprt_tcp, "tcp" }, - { Opt_xprt_tcp6, "tcp6" }, - { Opt_xprt_rdma, "rdma" }, - { Opt_xprt_rdma6, "rdma6" }, - - { Opt_xprt_err, NULL } -}; - -enum { - Opt_sec_none, Opt_sec_sys, - Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, - Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, - Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, - - Opt_sec_err -}; - -static const match_table_t nfs_secflavor_tokens = { - { Opt_sec_none, "none" }, - { Opt_sec_none, "null" }, - { Opt_sec_sys, "sys" }, - - { Opt_sec_krb5, "krb5" }, - { Opt_sec_krb5i, "krb5i" }, - { Opt_sec_krb5p, "krb5p" }, - - { Opt_sec_lkey, "lkey" }, - { Opt_sec_lkeyi, "lkeyi" }, - { Opt_sec_lkeyp, "lkeyp" }, - - { Opt_sec_spkm, "spkm3" }, - { Opt_sec_spkmi, "spkm3i" }, - { Opt_sec_spkmp, "spkm3p" }, - - { Opt_sec_err, NULL } -}; - -enum { - Opt_lookupcache_all, Opt_lookupcache_positive, - Opt_lookupcache_none, - - Opt_lookupcache_err -}; - -static match_table_t nfs_lookupcache_tokens = { - { Opt_lookupcache_all, "all" }, - { Opt_lookupcache_positive, "pos" }, - { Opt_lookupcache_positive, "positive" }, - { Opt_lookupcache_none, "none" }, - - { Opt_lookupcache_err, NULL } -}; - -enum { - Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, - Opt_local_lock_none, - - Opt_local_lock_err -}; - -static match_table_t nfs_local_lock_tokens = { - { Opt_local_lock_all, "all" }, - { Opt_local_lock_flock, "flock" }, - { Opt_local_lock_posix, "posix" }, - { Opt_local_lock_none, "none" }, - - { Opt_local_lock_err, NULL } -}; - -enum { - Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, Opt_vers_4_2, - - Opt_vers_err -}; - -static match_table_t nfs_vers_tokens = { - { Opt_vers_2, "2" }, - { Opt_vers_3, "3" }, - { Opt_vers_4, "4" }, - { Opt_vers_4_0, "4.0" }, - { Opt_vers_4_1, "4.1" }, - { Opt_vers_4_2, "4.2" }, - - { Opt_vers_err, NULL } -}; - -static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs"); -EXPORT_SYMBOL_GPL(nfs_fs_type); - -struct file_system_type nfs_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, @@ -326,26 +82,10 @@ const struct super_operations nfs_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; EXPORT_SYMBOL_GPL(nfs_sops); #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, const char *dev_name); - -struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs4"); -MODULE_ALIAS("nfs4"); -EXPORT_SYMBOL_GPL(nfs4_fs_type); - static int __init register_nfs4_fs(void) { return register_filesystem(&nfs4_fs_type); @@ -635,6 +375,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", "" }, { NFS_MOUNT_SOFTERR, ",softerr", "" }, + { NFS_MOUNT_SOFTREVAL, ",softreval", "" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, @@ -931,141 +672,6 @@ void nfs_umount_begin(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_umount_begin); -static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) -{ - struct nfs_parsed_mount_data *data; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data) { - data->timeo = NFS_UNSPEC_TIMEO; - data->retrans = NFS_UNSPEC_RETRANS; - data->acregmin = NFS_DEF_ACREGMIN; - data->acregmax = NFS_DEF_ACREGMAX; - data->acdirmin = NFS_DEF_ACDIRMIN; - data->acdirmax = NFS_DEF_ACDIRMAX; - data->mount_server.port = NFS_UNSPEC_PORT; - data->nfs_server.port = NFS_UNSPEC_PORT; - data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->selected_flavor = RPC_AUTH_MAXFLAVOR; - data->minorversion = 0; - data->need_mount = true; - data->net = current->nsproxy->net_ns; - data->lsm_opts = NULL; - } - return data; -} - -static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) -{ - if (data) { - kfree(data->client_address); - kfree(data->mount_server.hostname); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); - kfree(data); - } -} - -/* - * Sanity-check a server address provided by the mount command. - * - * Address family must be initialized, and address must not be - * the ANY address for that family. - */ -static int nfs_verify_server_address(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: { - struct sockaddr_in *sa = (struct sockaddr_in *)addr; - return sa->sin_addr.s_addr != htonl(INADDR_ANY); - } - case AF_INET6: { - struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; - return !ipv6_addr_any(sa); - } - } - - dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); - return 0; -} - -/* - * Select between a default port value and a user-specified port value. - * If a zero value is set, then autobind will be used. - */ -static void nfs_set_port(struct sockaddr *sap, int *port, - const unsigned short default_port) -{ - if (*port == NFS_UNSPEC_PORT) - *port = default_port; - - rpc_set_port(sap, *port); -} - -/* - * Sanity check the NFS transport protocol. - * - */ -static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - break; - default: - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * For text based NFSv2/v3 mounts, the mount protocol transport default - * settings should depend upon the specified NFS transport. - */ -static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - nfs_validate_transport_protocol(mnt); - - if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || - mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) - return; - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * Add 'flavor' to 'auth_info' if not already present. - * Returns true if 'flavor' ends up in the list, false otherwise - */ -static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, - rpc_authflavor_t flavor) -{ - unsigned int i; - unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); - - /* make sure this flavor isn't already in the list */ - for (i = 0; i < auth_info->flavor_len; i++) { - if (flavor == auth_info->flavors[i]) - return true; - } - - if (auth_info->flavor_len + 1 >= max_flavor_len) { - dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return false; - } - - auth_info->flavors[auth_info->flavor_len++] = flavor; - return true; -} - /* * Return true if 'match' is in auth_info or auth_info is empty. * Return false otherwise. @@ -1087,633 +693,13 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* - * Parse the value of the 'sec=' option. - */ -static int nfs_parse_security_flavors(char *value, - struct nfs_parsed_mount_data *mnt) -{ - substring_t args[MAX_OPT_ARGS]; - rpc_authflavor_t pseudoflavor; - char *p; - - dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - - while ((p = strsep(&value, ":")) != NULL) { - switch (match_token(p, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - pseudoflavor = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - pseudoflavor = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - pseudoflavor = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - pseudoflavor = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - pseudoflavor = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - pseudoflavor = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - pseudoflavor = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - pseudoflavor = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - pseudoflavor = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - pseudoflavor = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - pseudoflavor = RPC_AUTH_GSS_SPKMP; - break; - default: - dfprintk(MOUNT, - "NFS: sec= option '%s' not recognized\n", p); - return 0; - } - - if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) - return 0; - } - - return 1; -} - -static int nfs_parse_version_string(char *string, - struct nfs_parsed_mount_data *mnt, - substring_t *args) -{ - mnt->flags &= ~NFS_MOUNT_VER3; - switch (match_token(string, nfs_vers_tokens, args)) { - case Opt_vers_2: - mnt->version = 2; - break; - case Opt_vers_3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_vers_4: - /* Backward compatibility option. In future, - * the mount program should always supply - * a NFSv4 minor version number. - */ - mnt->version = 4; - break; - case Opt_vers_4_0: - mnt->version = 4; - mnt->minorversion = 0; - break; - case Opt_vers_4_1: - mnt->version = 4; - mnt->minorversion = 1; - break; - case Opt_vers_4_2: - mnt->version = 4; - mnt->minorversion = 2; - break; - default: - return 0; - } - return 1; -} - -static int nfs_get_option_str(substring_t args[], char **option) -{ - kfree(*option); - *option = match_strdup(args); - return !*option; -} - -static int nfs_get_option_ul(substring_t args[], unsigned long *option) -{ - int rc; - char *string; - - string = match_strdup(args); - if (string == NULL) - return -ENOMEM; - rc = kstrtoul(string, 10, option); - kfree(string); - - return rc; -} - -static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, - unsigned long l_bound, unsigned long u_bound) -{ - int ret; - - ret = nfs_get_option_ul(args, option); - if (ret != 0) - return ret; - if (*option < l_bound || *option > u_bound) - return -ERANGE; - return 0; -} - -/* - * Error-check and convert a string of mount options from user space into - * a data structure. The whole mount string is processed; bad options are - * skipped as they are encountered. If there were no errors, return 1; - * otherwise return 0 (zero). - */ -static int nfs_parse_mount_options(char *raw, - struct nfs_parsed_mount_data *mnt) -{ - char *p, *string; - int rc, sloppy = 0, invalid_option = 0; - unsigned short protofamily = AF_UNSPEC; - unsigned short mountfamily = AF_UNSPEC; - - if (!raw) { - dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); - return 1; - } - dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - - rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); - if (rc) - goto out_security_failure; - - while ((p = strsep(&raw, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - unsigned long option; - int token; - - if (!*p) - continue; - - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); - - token = match_token(p, nfs_mount_option_tokens, args); - switch (token) { - - /* - * boolean options: foo/nofoo - */ - case Opt_soft: - mnt->flags |= NFS_MOUNT_SOFT; - mnt->flags &= ~NFS_MOUNT_SOFTERR; - break; - case Opt_softerr: - mnt->flags |= NFS_MOUNT_SOFTERR; - mnt->flags &= ~NFS_MOUNT_SOFT; - break; - case Opt_hard: - mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); - break; - case Opt_posix: - mnt->flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - mnt->flags &= ~NFS_MOUNT_POSIX; - break; - case Opt_cto: - mnt->flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - mnt->flags |= NFS_MOUNT_NOCTO; - break; - case Opt_ac: - mnt->flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - mnt->flags |= NFS_MOUNT_NOAC; - break; - case Opt_lock: - mnt->flags &= ~NFS_MOUNT_NONLM; - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_nolock: - mnt->flags |= NFS_MOUNT_NONLM; - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_rdma: - mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(p); - break; - case Opt_acl: - mnt->flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - mnt->flags |= NFS_MOUNT_NOACL; - break; - case Opt_rdirplus: - mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; - break; - case Opt_nordirplus: - mnt->flags |= NFS_MOUNT_NORDIRPLUS; - break; - case Opt_sharecache: - mnt->flags &= ~NFS_MOUNT_UNSHARED; - break; - case Opt_nosharecache: - mnt->flags |= NFS_MOUNT_UNSHARED; - break; - case Opt_resvport: - mnt->flags &= ~NFS_MOUNT_NORESVPORT; - break; - case Opt_noresvport: - mnt->flags |= NFS_MOUNT_NORESVPORT; - break; - case Opt_fscache: - mnt->options |= NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_nofscache: - mnt->options &= ~NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_migration: - mnt->options |= NFS_OPTION_MIGRATION; - break; - case Opt_nomigration: - mnt->options &= ~NFS_OPTION_MIGRATION; - break; - - /* - * options that take numeric values - */ - case Opt_port: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->nfs_server.port = option; - break; - case Opt_rsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->rsize = option; - break; - case Opt_wsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->wsize = option; - break; - case Opt_bsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->bsize = option; - break; - case Opt_timeo: - if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) - goto out_invalid_value; - mnt->timeo = option; - break; - case Opt_retrans: - if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) - goto out_invalid_value; - mnt->retrans = option; - break; - case Opt_acregmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = option; - break; - case Opt_acregmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmax = option; - break; - case Opt_acdirmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmin = option; - break; - case Opt_acdirmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmax = option; - break; - case Opt_actimeo: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; - break; - case Opt_namelen: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->namlen = option; - break; - case Opt_mountport: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->mount_server.port = option; - break; - case Opt_mountvers: - if (nfs_get_option_ul(args, &option) || - option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) - goto out_invalid_value; - mnt->mount_server.version = option; - break; - case Opt_minorversion: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - mnt->minorversion = option; - break; - - /* - * options that take text values - */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_version_string(string, mnt, args); - kfree(string); - if (!rc) - goto out_invalid_value; - break; - case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_security_flavors(string, mnt); - kfree(string); - if (!rc) { - dfprintk(MOUNT, "NFS: unrecognized " - "security flavor\n"); - return 0; - } - break; - case Opt_proto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - - protofamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_rdma: - /* vector side protocols to TCP */ - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(string); - break; - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - kfree(string); - return 0; - } - kfree(string); - break; - case Opt_mountproto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - kfree(string); - - mountfamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma: /* not used for side protocols */ - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - return 0; - } - break; - case Opt_addr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->nfs_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->nfs_server.address, - sizeof(mnt->nfs_server.address)); - kfree(string); - if (mnt->nfs_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_clientaddr: - if (nfs_get_option_str(args, &mnt->client_address)) - goto out_nomem; - break; - case Opt_mounthost: - if (nfs_get_option_str(args, - &mnt->mount_server.hostname)) - goto out_nomem; - break; - case Opt_mountaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->mount_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->mount_server.address, - sizeof(mnt->mount_server.address)); - kfree(string); - if (mnt->mount_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_nconnect: - if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) - goto out_invalid_value; - mnt->nfs_server.nconnect = option; - break; - case Opt_lookupcache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_lookupcache_tokens, args); - kfree(string); - switch (token) { - case Opt_lookupcache_all: - mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); - break; - case Opt_lookupcache_positive: - mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; - break; - case Opt_lookupcache_none: - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "lookupcache argument\n"); - return 0; - } - break; - case Opt_fscache_uniq: - if (nfs_get_option_str(args, &mnt->fscache_uniq)) - goto out_nomem; - mnt->options |= NFS_OPTION_FSCACHE; - break; - case Opt_local_lock: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, - args); - kfree(string); - switch (token) { - case Opt_local_lock_all: - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_local_lock_flock: - mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; - break; - case Opt_local_lock_posix: - mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; - break; - case Opt_local_lock_none: - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "local_lock argument\n"); - return 0; - } - break; - - /* - * Special options - */ - case Opt_sloppy: - sloppy = 1; - dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); - break; - case Opt_userspace: - case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option " - "'%s'\n", p); - break; - - default: - invalid_option = 1; - dfprintk(MOUNT, "NFS: unrecognized mount option " - "'%s'\n", p); - } - } - - if (!sloppy && invalid_option) - return 0; - - if (mnt->minorversion && mnt->version != 4) - goto out_minorversion_mismatch; - - if (mnt->options & NFS_OPTION_MIGRATION && - (mnt->version != 4 || mnt->minorversion != 0)) - goto out_migration_misuse; - - /* - * verify that any proto=/mountproto= options match the address - * families in the addr=/mountaddr= options. - */ - if (protofamily != AF_UNSPEC && - protofamily != mnt->nfs_server.address.ss_family) - goto out_proto_mismatch; - - if (mountfamily != AF_UNSPEC) { - if (mnt->mount_server.addrlen) { - if (mountfamily != mnt->mount_server.address.ss_family) - goto out_mountproto_mismatch; - } else { - if (mountfamily != mnt->nfs_server.address.ss_family) - goto out_mountproto_mismatch; - } - } - - return 1; - -out_mountproto_mismatch: - printk(KERN_INFO "NFS: mount server address does not match mountproto= " - "option\n"); - return 0; -out_proto_mismatch: - printk(KERN_INFO "NFS: server address does not match proto= option\n"); - return 0; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return 0; -out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); - return 0; -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", mnt->version, mnt->minorversion); - return 0; -out_migration_misuse: - printk(KERN_INFO - "NFS: 'migration' not supported for this NFS version\n"); - return 0; -out_nomem: - printk(KERN_INFO "NFS: not enough memory to parse option\n"); - return 0; -out_security_failure: - printk(KERN_INFO "NFS: security options invalid: %d\n", rc); - return 0; -} - -/* - * Ensure that a specified authtype in args->auth_info is supported by - * the server. Returns 0 and sets args->selected_flavor if it's ok, and + * Ensure that a specified authtype in ctx->auth_info is supported by + * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and * -EACCES if not. */ -static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, - rpc_authflavor_t *server_authlist, unsigned int count) +static int nfs_verify_authflavors(struct nfs_fs_context *ctx, + rpc_authflavor_t *server_authlist, + unsigned int count) { rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; bool found_auth_null = false; @@ -1734,7 +720,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, for (i = 0; i < count; i++) { flavor = server_authlist[i]; - if (nfs_auth_info_match(&args->auth_info, flavor)) + if (nfs_auth_info_match(&ctx->auth_info, flavor)) goto out; if (flavor == RPC_AUTH_NULL) @@ -1742,7 +728,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, } if (found_auth_null) { - flavor = args->auth_info.flavors[0]; + flavor = ctx->auth_info.flavors[0]; goto out; } @@ -1751,8 +737,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, return -EACCES; out: - args->selected_flavor = flavor; - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); + ctx->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor); return 0; } @@ -1760,50 +746,51 @@ out: * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_request_mount(struct nfs_parsed_mount_data *args, +static int nfs_request_mount(struct fs_context *fc, struct nfs_fh *root_fh, rpc_authflavor_t *server_authlist, unsigned int *server_authlist_len) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_mount_request request = { .sap = (struct sockaddr *) - &args->mount_server.address, - .dirpath = args->nfs_server.export_path, - .protocol = args->mount_server.protocol, + &ctx->mount_server.address, + .dirpath = ctx->nfs_server.export_path, + .protocol = ctx->mount_server.protocol, .fh = root_fh, - .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, - .net = args->net, + .net = fc->net_ns, }; int status; - if (args->mount_server.version == 0) { - switch (args->version) { + if (ctx->mount_server.version == 0) { + switch (ctx->version) { default: - args->mount_server.version = NFS_MNT3_VERSION; + ctx->mount_server.version = NFS_MNT3_VERSION; break; case 2: - args->mount_server.version = NFS_MNT_VERSION; + ctx->mount_server.version = NFS_MNT_VERSION; } } - request.version = args->mount_server.version; + request.version = ctx->mount_server.version; - if (args->mount_server.hostname) - request.hostname = args->mount_server.hostname; + if (ctx->mount_server.hostname) + request.hostname = ctx->mount_server.hostname; else - request.hostname = args->nfs_server.hostname; + request.hostname = ctx->nfs_server.hostname; /* * Construct the mount server's address. */ - if (args->mount_server.address.ss_family == AF_UNSPEC) { - memcpy(request.sap, &args->nfs_server.address, - args->nfs_server.addrlen); - args->mount_server.addrlen = args->nfs_server.addrlen; + if (ctx->mount_server.address.sa_family == AF_UNSPEC) { + memcpy(request.sap, &ctx->nfs_server.address, + ctx->nfs_server.addrlen); + ctx->mount_server.addrlen = ctx->nfs_server.addrlen; } - request.salen = args->mount_server.addrlen; - nfs_set_port(request.sap, &args->mount_server.port, 0); + request.salen = ctx->mount_server.addrlen; + nfs_set_port(request.sap, &ctx->mount_server.port, 0); /* * Now ask the mount server to map our export path @@ -1819,20 +806,18 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return 0; } -static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); int status; unsigned int i; bool tried_auth_unix = false; bool auth_null_in_list = false; struct nfs_server *server = ERR_PTR(-EACCES); - struct nfs_parsed_mount_data *args = mount_info->parsed; rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); - status = nfs_request_mount(args, mount_info->mntfh, authlist, - &authlist_len); + status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); @@ -1840,13 +825,13 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_info.flavor_len > 0) { - status = nfs_verify_authflavors(args, authlist, authlist_len); + if (ctx->auth_info.flavor_len > 0) { + status = nfs_verify_authflavors(ctx, authlist, authlist_len); dfprintk(MOUNT, "NFS: using auth flavor %u\n", - args->selected_flavor); + ctx->selected_flavor); if (status) return ERR_PTR(status); - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + return ctx->nfs_mod->rpc_ops->create_server(fc); } /* @@ -1872,8 +857,8 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - args->selected_flavor = flavor; - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + ctx->selected_flavor = flavor; + server = ctx->nfs_mod->rpc_ops->create_server(fc); if (!IS_ERR(server)) return server; } @@ -1888,348 +873,23 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - args->selected_flavor = RPC_AUTH_UNIX; - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + ctx->selected_flavor = RPC_AUTH_UNIX; + return ctx->nfs_mod->rpc_ops->create_server(fc); } -struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +int nfs_try_get_tree(struct fs_context *fc) { - struct nfs_server *server; + struct nfs_fs_context *ctx = nfs_fc2context(fc); - if (mount_info->parsed->need_mount) - server = nfs_try_mount_request(mount_info, nfs_mod); + if (ctx->need_mount) + ctx->server = nfs_try_mount_request(fc); else - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - - if (IS_ERR(server)) - return ERR_CAST(server); - - return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); -} -EXPORT_SYMBOL_GPL(nfs_try_mount); - -/* - * Split "dev_name" into "hostname:export_path". - * - * The leftmost colon demarks the split between the server's hostname - * and the export path. If the hostname starts with a left square - * bracket, then it may contain colons. - * - * Note: caller frees hostname and export path, even on error. - */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) -{ - size_t len; - char *end; - - if (unlikely(!dev_name || !*dev_name)) { - dfprintk(MOUNT, "NFS: device name not specified\n"); - return -EINVAL; - } - - /* Is the host name protected with square brakcets? */ - if (*dev_name == '[') { - end = strchr(++dev_name, ']'); - if (end == NULL || end[1] != ':') - goto out_bad_devname; - - len = end - dev_name; - end++; - } else { - char *comma; - - end = strchr(dev_name, ':'); - if (end == NULL) - goto out_bad_devname; - len = end - dev_name; - - /* kill possible hostname list: not supported */ - comma = strchr(dev_name, ','); - if (comma != NULL && comma < end) - len = comma - dev_name; - } - - if (len > maxnamlen) - goto out_hostname; - - /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (*hostname == NULL) - goto out_nomem; - len = strlen(++end); - if (len > maxpathlen) - goto out_path; - *export_path = kstrndup(end, len, GFP_KERNEL); - if (!*export_path) - goto out_nomem; - - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); - return 0; - -out_bad_devname: - dfprintk(MOUNT, "NFS: device name not in host:path format\n"); - return -EINVAL; + ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc); -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); - return -ENOMEM; - -out_hostname: - dfprintk(MOUNT, "NFS: server hostname too long\n"); - return -ENAMETOOLONG; - -out_path: - dfprintk(MOUNT, "NFS: export pathname too long\n"); - return -ENAMETOOLONG; + return nfs_get_tree_common(fc); } +EXPORT_SYMBOL_GPL(nfs_try_get_tree); -/* - * Validate the NFS2/NFS3 mount data - * - fills in the mount root filehandle - * - * For option strings, user space handles the following behaviors: - * - * + DNS: mapping server host name to IP address ("addr=" option) - * - * + failure mode: how to behave if a mount request can't be handled - * immediately ("fg/bg" option) - * - * + retry: how often to retry a mount request ("retry=" option) - * - * + breaking back: trying proto=udp after proto=tcp, v2 after v3, - * mountproto=tcp after mountproto=udp, and so on - */ -static int nfs23_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - struct nfs_mount_data *data = (struct nfs_mount_data *)options; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; - - if (data == NULL) - goto out_no_data; - - args->version = NFS_DEFAULT_VERSION; - switch (data->version) { - case 1: - data->namlen = 0; /* fall through */ - case 2: - data->bsize = 0; /* fall through */ - case 3: - if (data->flags & NFS_MOUNT_VER3) - goto out_no_v3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - /* Turn off security negotiation */ - extra_flags |= NFS_MOUNT_SECFLAVOUR; - /* fall through */ - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) - goto out_no_sec; - /* fall through */ - case 5: - memset(data->context, 0, sizeof(data->context)); - /* fall through */ - case 6: - if (data->flags & NFS_MOUNT_VER3) { - if (data->root.size > NFS3_FHSIZE || data->root.size == 0) - goto out_invalid_fh; - mntfh->size = data->root.size; - args->version = 3; - } else { - mntfh->size = NFS2_FHSIZE; - args->version = 2; - } - - - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); - - /* - * Translate to nfs_parsed_mount_data, which nfs_fill_super - * can deal with. - */ - args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= extra_flags; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->need_mount = false; - - memcpy(sap, &data->addr, sizeof(data->addr)); - args->nfs_server.addrlen = sizeof(data->addr); - args->nfs_server.port = ntohs(data->addr.sin_port); - if (sap->sa_family != AF_INET || - !nfs_verify_server_address(sap)) - goto out_no_address; - - if (!(data->flags & NFS_MOUNT_TCP)) - args->nfs_server.protocol = XPRT_TRANSPORT_UDP; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); - args->namlen = data->namlen; - args->bsize = data->bsize; - - if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->selected_flavor = data->pseudoflavor; - else - args->selected_flavor = RPC_AUTH_UNIX; - if (!args->nfs_server.hostname) - goto out_nomem; - - if (!(data->flags & NFS_MOUNT_NONLM)) - args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - else - args->flags |= (NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - /* - * The legacy version 6 binary mount data from userspace has a - * field used only to transport selinux information into the - * the kernel. To continue to support that functionality we - * have a touch of selinux knowledge here in the NFS code. The - * userspace code converted context=blah to just blah so we are - * converting back to the full string selinux understands. - */ - if (data->context[0]){ -#ifdef CONFIG_SECURITY_SELINUX - int rc; - data->context[NFS_MAX_CONTEXT_LEN] = '\0'; - rc = security_add_mnt_opt("context", data->context, - strlen(data->context), &args->lsm_opts); - if (rc) - return rc; -#else - return -EINVAL; -#endif - } - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_no_v3: - dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", - data->version); - return -EINVAL; - -out_no_sec: - dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); - return -EINVAL; - -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); - return -ENOMEM; - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_fh: - dfprintk(MOUNT, "NFS: invalid root filehandle\n"); - return -EINVAL; -} - -#if IS_ENABLED(CONFIG_NFS_V4) -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - if (fs_type == &nfs_fs_type) - return nfs23_validate_mount_data(options, args, mntfh, dev_name); - return nfs4_validate_mount_data(options, args, dev_name); -} -#else -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - return nfs23_validate_mount_data(options, args, mntfh, dev_name); -} -#endif - -static int nfs_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - int port = 0; - int max_namelen = PAGE_SIZE; - int max_pathlen = NFS_MAXPATHLEN; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - - if (nfs_parse_mount_options((char *)options, args) == 0) - return -EINVAL; - - if (!nfs_verify_server_address(sap)) - goto out_no_address; - - if (args->version == 4) { -#if IS_ENABLED(CONFIG_NFS_V4) - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - else - port = NFS_PORT; - max_namelen = NFS4_MAXNAMLEN; - max_pathlen = NFS4_MAXPATHLEN; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - nfs4_validate_mount_flags(args); -#else - goto out_v4_not_compiled; -#endif /* CONFIG_NFS_V4 */ - } else { - nfs_set_mount_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - } - - nfs_set_port(sap, &args->nfs_server.port, port); - - return nfs_parse_devname(dev_name, - &args->nfs_server.hostname, - max_namelen, - &args->nfs_server.export_path, - max_pathlen); - -#if !IS_ENABLED(CONFIG_NFS_V4) -out_v4_not_compiled: - dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); - return -EPROTONOSUPPORT; -#else -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -#endif /* !CONFIG_NFS_V4 */ - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; -} #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ @@ -2246,39 +906,35 @@ out_no_address: static int nfs_compare_remount_data(struct nfs_server *nfss, - struct nfs_parsed_mount_data *data) -{ - if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || - data->rsize != nfss->rsize || - data->wsize != nfss->wsize || - data->version != nfss->nfs_client->rpc_ops->version || - data->minorversion != nfss->nfs_client->cl_minorversion || - data->retrans != nfss->client->cl_timeout->to_retries || - !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || - data->acregmin != nfss->acregmin / HZ || - data->acregmax != nfss->acregmax / HZ || - data->acdirmin != nfss->acdirmin / HZ || - data->acdirmax != nfss->acdirmax / HZ || - data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || - (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || - data->nfs_server.port != nfss->port || - data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || - !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, + struct nfs_fs_context *ctx) +{ + if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || + ctx->rsize != nfss->rsize || + ctx->wsize != nfss->wsize || + ctx->version != nfss->nfs_client->rpc_ops->version || + ctx->minorversion != nfss->nfs_client->cl_minorversion || + ctx->retrans != nfss->client->cl_timeout->to_retries || + !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) || + ctx->acregmin != nfss->acregmin / HZ || + ctx->acregmax != nfss->acregmax / HZ || + ctx->acdirmin != nfss->acdirmin / HZ || + ctx->acdirmax != nfss->acdirmax / HZ || + ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || + ctx->nfs_server.port != nfss->port || + ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address, (struct sockaddr *)&nfss->nfs_client->cl_addr)) return -EINVAL; return 0; } -int -nfs_remount(struct super_block *sb, int *flags, char *raw_data) +int nfs_reconfigure(struct fs_context *fc) { - int error; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct super_block *sb = fc->root->d_sb; struct nfs_server *nfss = sb->s_fs_info; - struct nfs_parsed_mount_data *data; - struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; - struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; - u32 nfsvers = nfss->nfs_client->rpc_ops->version; sync_filesystem(sb); @@ -2288,92 +944,38 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * ones were explicitly specified. Fall back to legacy behavior and * just return success. */ - if ((nfsvers == 4 && (!options4 || options4->version == 1)) || - (nfsvers <= 3 && (!options || (options->version >= 1 && - options->version <= 6)))) + if (ctx->skip_reconfig_option_check) return 0; - data = nfs_alloc_parsed_mount_data(); - if (data == NULL) - return -ENOMEM; - - /* fill out struct with values from existing mount */ - data->flags = nfss->flags; - data->rsize = nfss->rsize; - data->wsize = nfss->wsize; - data->retrans = nfss->client->cl_timeout->to_retries; - data->selected_flavor = nfss->client->cl_auth->au_flavor; - data->acregmin = nfss->acregmin / HZ; - data->acregmax = nfss->acregmax / HZ; - data->acdirmin = nfss->acdirmin / HZ; - data->acdirmax = nfss->acdirmax / HZ; - data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; - data->nfs_server.port = nfss->port; - data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; - data->version = nfsvers; - data->minorversion = nfss->nfs_client->cl_minorversion; - data->net = current->nsproxy->net_ns; - memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, - data->nfs_server.addrlen); - - /* overwrite those values with any that were specified */ - error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, data)) - goto out; - /* * noac is a special case. It implies -o sync, but that's not - * necessarily reflected in the mtab options. do_remount_sb + * necessarily reflected in the mtab options. reconfigure_super * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ - if (data->flags & NFS_MOUNT_NOAC) - *flags |= SB_SYNCHRONOUS; + if (ctx->flags & NFS_MOUNT_NOAC) { + fc->sb_flags |= SB_SYNCHRONOUS; + fc->sb_flags_mask |= SB_SYNCHRONOUS; + } /* compare new mount options with old ones */ - error = nfs_compare_remount_data(nfss, data); - if (!error) - error = security_sb_remount(sb, data->lsm_opts); -out: - nfs_free_parsed_mount_data(data); - return error; -} -EXPORT_SYMBOL_GPL(nfs_remount); - -/* - * Initialise the common bits of the superblock - */ -static void nfs_initialise_sb(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), - "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - - nfs_super_set_maxbytes(sb, server->maxfilesize); + return nfs_compare_remount_data(nfss, ctx); } +EXPORT_SYMBOL_GPL(nfs_reconfigure); /* - * Finish setting up an NFS2/3 superblock + * Finish setting up an NFS superblock */ -void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) { - struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; sb->s_blocksize = 0; sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; sb->s_op = server->nfs_client->cl_nfs_mod->sops; - if (data && data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (ctx && ctx->bsize) + sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do @@ -2393,53 +995,27 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_max = S64_MAX; } - nfs_initialise_sb(sb); -} -EXPORT_SYMBOL_GPL(nfs_fill_super); - -/* - * Finish setting up a cloned NFS2/3/4 superblock - */ -static void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - struct nfs_server *server = NFS_SB(sb); - - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_xattr = old_sb->s_xattr; - sb->s_op = old_sb->s_op; - sb->s_export_op = old_sb->s_export_op; + sb->s_magic = NFS_SUPER_MAGIC; - if (server->nfs_client->rpc_ops->version != 2) { - /* The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= SB_POSIXACL; - sb->s_time_gran = 1; - } else - sb->s_time_gran = 1000; + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - if (server->nfs_client->rpc_ops->version != 4) { - sb->s_time_min = 0; - sb->s_time_max = U32_MAX; - } else { - sb->s_time_min = S64_MIN; - sb->s_time_max = S64_MAX; - } + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); - nfs_initialise_sb(sb); + nfs_super_set_maxbytes(sb, server->maxfilesize); } -static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, + const struct fs_context *fc) { const struct nfs_server *a = s->s_fs_info; const struct rpc_clnt *clnt_a = a->client; const struct rpc_clnt *clnt_b = b->client; - if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK)) goto Ebusy; if (a->nfs_client != b->nfs_client) goto Ebusy; @@ -2464,19 +1040,11 @@ Ebusy: return 0; } -struct nfs_sb_mountdata { - struct nfs_server *server; - int mntflags; -}; - -static int nfs_set_super(struct super_block *s, void *data) +static int nfs_set_super(struct super_block *s, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server; + struct nfs_server *server = fc->s_fs_info; int ret; - s->s_flags = sb_mntdata->mntflags; - s->s_fs_info = server; s->s_d_op = server->nfs_client->rpc_ops->dentry_ops; ret = set_anon_super(s, server); if (ret == 0) @@ -2541,11 +1109,9 @@ static int nfs_compare_userns(const struct nfs_server *old, return 1; } -static int nfs_compare_super(struct super_block *sb, void *data) +static int nfs_compare_super(struct super_block *sb, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); - int mntflags = sb_mntdata->mntflags; + struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb); if (!nfs_compare_super_address(old, server)) return 0; @@ -2556,13 +1122,12 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 0; if (!nfs_compare_userns(old, server)) return 0; - return nfs_compare_mount_options(sb, server, mntflags); + return nfs_compare_mount_options(sb, server, fc); } #ifdef CONFIG_NFS_FSCACHE static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; @@ -2571,80 +1136,36 @@ static void nfs_get_cache_cookie(struct super_block *sb, nfss->fscache_key = NULL; nfss->fscache = NULL; - if (parsed) { - if (!(parsed->options & NFS_OPTION_FSCACHE)) - return; - if (parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); - } - } else if (cloned) { - struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!ctx) + return; + + if (ctx->clone_data.sb) { + struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb); if (!(mnt_s->options & NFS_OPTION_FSCACHE)) return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; } - } else + } else { + if (!(ctx->options & NFS_OPTION_FSCACHE)) + return; + if (ctx->fscache_uniq) { + uniq = ctx->fscache_uniq; + ulen = strlen(ctx->fscache_uniq); + } return; + } nfs_fscache_get_super_cookie(sb, uniq, ulen); } #else static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { } #endif -int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, - kflags, &kflags_out); - if (error) - goto err; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; -err: - return error; -} -EXPORT_SYMBOL_GPL(nfs_set_sb_security); - -int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - - /* clone any lsm security options from the parent to the new sb */ - if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) - return -ESTALE; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, - &kflags_out); - if (error) - return error; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; - return 0; -} -EXPORT_SYMBOL_GPL(nfs_clone_sb_security); - static void nfs_set_readahead(struct backing_dev_info *bdi, unsigned long iomax_pages) { @@ -2652,35 +1173,40 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, bdi->io_pages = iomax_pages; } -struct dentry *nfs_fs_mount_common(struct nfs_server *server, - int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +int nfs_get_tree_common(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *s; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - .server = server, - }; + int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super; + struct nfs_server *server = ctx->server; + unsigned long kflags = 0, kflags_out = 0; int error; + ctx->server = NULL; + if (IS_ERR(server)) + return PTR_ERR(server); + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + fc->sb_flags |= SB_SYNCHRONOUS; - if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) - if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + if (ctx->clone_data.sb) + if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS) + fc->sb_flags |= SB_SYNCHRONOUS; + + if (server->caps & NFS_CAP_SECURITY_LABEL) + fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); + fc->s_fs_info = server; + s = sget_fc(fc, compare_super, nfs_set_super); + fc->s_fs_info = NULL; if (IS_ERR(s)) { - mntroot = ERR_CAST(s); + error = PTR_ERR(s); + nfs_errorf(fc, "NFS: Couldn't get superblock"); goto out_err_nosb; } @@ -2690,88 +1216,66 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, } else { error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - if (error) { - mntroot = ERR_PTR(error); + if (error) goto error_splat_super; - } nfs_set_readahead(s->s_bdi, server->rpages); server->super = s; } if (!s->s_root) { + unsigned bsize = ctx->clone_data.inherited_bsize; /* initial superblock/root creation */ - mount_info->fill_super(s, mount_info); - nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); - if (!(server->flags & NFS_MOUNT_UNSHARED)) - s->s_iflags |= SB_I_MULTIROOT; + nfs_fill_super(s, ctx); + if (bsize) { + s->s_blocksize_bits = bsize; + s->s_blocksize = 1U << bsize; + } + nfs_get_cache_cookie(s, ctx); } - mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); - if (IS_ERR(mntroot)) + error = nfs_get_root(s, fc); + if (error < 0) { + nfs_errorf(fc, "NFS: Couldn't get root dentry"); goto error_splat_super; + } - error = mount_info->set_security(s, mntroot, mount_info); + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) + kflags |= SECURITY_LSM_NATIVE_LABELS; + if (ctx->clone_data.sb) { + if (d_inode(fc->root)->i_fop != &nfs_dir_operations) { + error = -ESTALE; + goto error_splat_root; + } + /* clone any lsm security options from the parent to the new sb */ + error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags, + &kflags_out); + } else { + error = security_sb_set_mnt_opts(s, fc->security, + kflags, &kflags_out); + } if (error) goto error_splat_root; + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && + !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) + NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; s->s_flags |= SB_ACTIVE; + error = 0; out: - return mntroot; + return error; out_err_nosb: nfs_free_server(server); goto out; error_splat_root: - dput(mntroot); - mntroot = ERR_PTR(error); + dput(fc->root); + fc->root = NULL; error_splat_super: deactivate_locked_super(s); goto out; } -EXPORT_SYMBOL_GPL(nfs_fs_mount_common); - -struct dentry *nfs_fs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, - .set_security = nfs_set_sb_security, - }; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod; - int error; - - mount_info.parsed = nfs_alloc_parsed_mount_data(); - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.parsed == NULL || mount_info.mntfh == NULL) - goto out; - - /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); - if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); - if (error < 0) { - mntroot = ERR_PTR(error); - goto out; - } - - nfs_mod = get_nfs_version(mount_info.parsed->version); - if (IS_ERR(nfs_mod)) { - mntroot = ERR_CAST(nfs_mod); - goto out; - } - - mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); - - put_nfs_version(nfs_mod); -out: - nfs_free_parsed_mount_data(mount_info.parsed); - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} -EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Destroy an NFS2/3 superblock @@ -2790,150 +1294,8 @@ void nfs_kill_super(struct super_block *s) } EXPORT_SYMBOL_GPL(nfs_kill_super); -/* - * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; - - dprintk("--> nfs_xdev_mount()\n"); - - mount_info.mntfh = mount_info.cloned->fh; - - /* create a new volume representation */ - server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - - if (IS_ERR(server)) - mntroot = ERR_CAST(server); - else - mntroot = nfs_fs_mount_common(server, flags, - dev_name, &mount_info, nfs_mod); - - dprintk("<-- nfs_xdev_mount() = %ld\n", - IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); - return mntroot; -} - #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) -{ - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| - NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); -} - -/* - * Validate NFSv4 mount options - */ -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; - char *c; - - if (data == NULL) - goto out_no_data; - - args->version = 4; - - switch (data->version) { - case 1: - if (data->host_addrlen > sizeof(args->nfs_server.address)) - goto out_no_address; - if (data->host_addrlen == 0) - goto out_no_address; - args->nfs_server.addrlen = data->host_addrlen; - if (copy_from_user(sap, data->host_addr, data->host_addrlen)) - return -EFAULT; - if (!nfs_verify_server_address(sap)) - goto out_no_address; - args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); - - if (data->auth_flavourlen) { - rpc_authflavor_t pseudoflavor; - if (data->auth_flavourlen > 1) - goto out_inval_auth; - if (copy_from_user(&pseudoflavor, - data->auth_flavours, - sizeof(pseudoflavor))) - return -EFAULT; - args->selected_flavor = pseudoflavor; - } else - args->selected_flavor = RPC_AUTH_UNIX; - - c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.hostname = c; - - c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.export_path = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); - - c = strndup_user(data->client_addr.data, 16); - if (IS_ERR(c)) - return PTR_ERR(c); - args->client_address = c; - - /* - * Translate to nfs_parsed_mount_data, which nfs4_fill_super - * can deal with. - */ - - args->flags = data->flags & NFS4_MOUNT_FLAGMASK; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.protocol = data->proto; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_inval_auth: - dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", - data->auth_flavourlen); - return -EINVAL; - -out_no_address: - dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -} - /* * NFS v4 module parameters need to stay in the * NFS client for backwards compatibility diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 52cab65f91cf..c478b772cc49 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -243,13 +243,24 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct address_space *mapping) { + struct inode *inode = mapping->host; + nfs_zap_mapping(mapping->host, mapping); + /* Force file size revalidation */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_SIZE; + spin_unlock(&inode->i_lock); } static void nfs_mapping_set_error(struct page *page, int error) { + struct address_space *mapping = page_file_mapping(page); + SetPageError(page); - mapping_set_error(page_file_mapping(page), error); + mapping_set_error(mapping, error); + nfs_set_pageerror(mapping); } /* @@ -592,7 +603,7 @@ release_request: static void nfs_write_error(struct nfs_page *req, int error) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_write_error(req, error); nfs_mapping_set_error(req->wb_page, error); nfs_inode_remove_request(req); nfs_end_page_writeback(req); @@ -998,7 +1009,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_comp_error(req, hdr->error); nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; } @@ -1403,8 +1414,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); - trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, - hdr->args.stable); + trace_nfs_initiate_write(hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1568,8 +1578,7 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); - trace_nfs_writeback_done(inode, task->tk_status, - hdr->args.offset, hdr->res.verf); + trace_nfs_writeback_done(task, hdr); if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { @@ -1649,6 +1658,8 @@ static void nfs_writeback_result(struct rpc_task *task, */ argp->stable = NFS_FILE_SYNC; } + resp->count = 0; + resp->verf->committed = 0; rpc_restart_call_prepare(task); } } @@ -1824,11 +1835,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); - trace_nfs_commit_done(data); + trace_nfs_commit_done(task, data); } static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1847,6 +1859,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { if (req->wb_page) { + trace_nfs_commit_error(req, status); nfs_mapping_set_error(req->wb_page, status); nfs_inode_remove_request(req); } @@ -1856,7 +1869,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f2f81561ebb6..f368f3215f88 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -134,6 +134,16 @@ config NFSD_FLEXFILELAYOUT If unsure, say N. +config NFSD_V4_2_INTER_SSC + bool "NFSv4.2 inter server to server COPY" + depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 + help + This option enables support for NFSv4.2 inter server to + server copy where the destination server calls the NFSv4.2 + client to read the data to copy from the source server. + + If unsure, say N. + config NFSD_V4_SECURITY_LABEL bool "Provide Security Label support for NFSv4 server" depends on NFSD_V4 && SECURITY diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 32a9bf22ac08..22e77ede9f14 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -27,7 +27,6 @@ #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_LRU_RESCAN (0) #define NFSD_FILE_SHUTDOWN (1) #define NFSD_FILE_LRU_THRESHOLD (4096UL) #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) @@ -44,6 +43,17 @@ struct nfsd_fcache_bucket { static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +struct nfsd_fcache_disposal { + struct list_head list; + struct work_struct work; + struct net *net; + spinlock_t lock; + struct list_head freeme; + struct rcu_head rcu; +}; + +static struct workqueue_struct *nfsd_filecache_wq __read_mostly; + static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; static struct nfsd_fcache_bucket *nfsd_file_hashtbl; @@ -52,32 +62,21 @@ static long nfsd_file_lru_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; +static DEFINE_SPINLOCK(laundrette_lock); +static LIST_HEAD(laundrettes); -enum nfsd_file_laundrette_ctl { - NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, - NFSD_FILE_LAUNDRETTE_MAY_FLUSH -}; +static void nfsd_file_gc(void); static void -nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) +nfsd_file_schedule_laundrette(void) { long count = atomic_long_read(&nfsd_filecache_count); if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) return; - /* Be more aggressive about scanning if over the threshold */ - if (count > NFSD_FILE_LRU_THRESHOLD) - mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); - else - schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); - - if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) - return; - - /* ...and don't delay flushing if we're out of control */ - if (count >= NFSD_FILE_LRU_LIMIT) - flush_delayed_work(&nfsd_filecache_laundrette); + queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + NFSD_LAUNDRETTE_DELAY); } static void @@ -101,7 +100,7 @@ nfsd_file_mark_free(struct fsnotify_mark *mark) static struct nfsd_file_mark * nfsd_file_mark_get(struct nfsd_file_mark *nfm) { - if (!atomic_inc_not_zero(&nfm->nfm_ref)) + if (!refcount_inc_not_zero(&nfm->nfm_ref)) return NULL; return nfm; } @@ -109,8 +108,7 @@ nfsd_file_mark_get(struct nfsd_file_mark *nfm) static void nfsd_file_mark_put(struct nfsd_file_mark *nfm) { - if (atomic_dec_and_test(&nfm->nfm_ref)) { - + if (refcount_dec_and_test(&nfm->nfm_ref)) { fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); fsnotify_put_mark(&nfm->nfm_mark); } @@ -133,9 +131,13 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) struct nfsd_file_mark, nfm_mark)); mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); - fsnotify_put_mark(mark); - if (likely(nfm)) + if (nfm) { + fsnotify_put_mark(mark); break; + } + /* Avoid soft lockup race with nfsd_file_mark_put() */ + fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); + fsnotify_put_mark(mark); } else mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); @@ -145,7 +147,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) return NULL; fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; - atomic_set(&new->nfm_ref, 1); + refcount_set(&new->nfm_ref, 1); err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); @@ -183,7 +185,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_flags = 0; nf->nf_inode = inode; nf->nf_hashval = hashval; - atomic_set(&nf->nf_ref, 1); + refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; if (may & NFSD_MAY_NOT_BREAK_LEASE) { if (may & NFSD_MAY_WRITE) @@ -192,6 +194,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); } nf->nf_mark = NULL; + init_rwsem(&nf->nf_rwsem); trace_nfsd_file_alloc(nf); } return nf; @@ -238,13 +241,6 @@ nfsd_file_check_write_error(struct nfsd_file *nf) return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); } -static bool -nfsd_file_in_use(struct nfsd_file *nf) -{ - return nfsd_file_check_writeback(nf) || - nfsd_file_check_write_error(nf); -} - static void nfsd_file_do_unhash(struct nfsd_file *nf) { @@ -256,8 +252,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf) nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; hlist_del_rcu(&nf->nf_node); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); atomic_long_dec(&nfsd_filecache_count); } @@ -266,6 +260,8 @@ nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_do_unhash(nf); + if (!list_empty(&nf->nf_lru)) + list_lru_del(&nfsd_file_lru, &nf->nf_lru); return true; } return false; @@ -283,42 +279,48 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp if (!nfsd_file_unhash(nf)) return false; /* keep final reference for nfsd_file_lru_dispose */ - if (atomic_add_unless(&nf->nf_ref, -1, 1)) + if (refcount_dec_not_one(&nf->nf_ref)) return true; list_add(&nf->nf_lru, dispose); return true; } -static int +static void nfsd_file_put_noref(struct nfsd_file *nf) { - int count; trace_nfsd_file_put(nf); - count = atomic_dec_return(&nf->nf_ref); - if (!count) { + if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); nfsd_file_free(nf); } - return count; } void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - bool unused = !nfsd_file_in_use(nf); + bool is_hashed; set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); + if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { + nfsd_file_put_noref(nf); + return; + } + + filemap_flush(nf->nf_file->f_mapping); + is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; + nfsd_file_put_noref(nf); + if (is_hashed) + nfsd_file_schedule_laundrette(); + if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) + nfsd_file_gc(); } struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(atomic_inc_not_zero(&nf->nf_ref))) + if (likely(refcount_inc_not_zero(&nf->nf_ref))) return nf; return NULL; } @@ -344,7 +346,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del(&nf->nf_lru); - if (!atomic_dec_and_test(&nf->nf_ref)) + if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) flush = true; @@ -353,6 +355,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) flush_delayed_fput(); } +static void +nfsd_file_list_remove_disposal(struct list_head *dst, + struct nfsd_fcache_disposal *l) +{ + spin_lock(&l->lock); + list_splice_init(&l->freeme, dst); + spin_unlock(&l->lock); +} + +static void +nfsd_file_list_add_disposal(struct list_head *files, struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net == net) { + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); + break; + } + } + rcu_read_unlock(); +} + +static void +nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, + struct net *net) +{ + struct nfsd_file *nf, *tmp; + + list_for_each_entry_safe(nf, tmp, src, nf_lru) { + if (nf->nf_net == net) + list_move_tail(&nf->nf_lru, dst); + } +} + +static void +nfsd_file_dispose_list_delayed(struct list_head *dispose) +{ + LIST_HEAD(list); + struct nfsd_file *nf; + + while(!list_empty(dispose)) { + nf = list_first_entry(dispose, struct nfsd_file, nf_lru); + nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); + nfsd_file_list_add_disposal(&list, nf->nf_net); + } +} + /* * Note this can deadlock with nfsd_file_cache_purge. */ @@ -375,7 +429,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (atomic_read(&nf->nf_ref) > 1) + if (refcount_read(&nf->nf_ref) > 1) goto out_skip; /* @@ -386,31 +440,51 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, goto out_skip; if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_rescan; + goto out_skip; if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) goto out_skip; list_lru_isolate_move(lru, &nf->nf_lru, head); return LRU_REMOVED; -out_rescan: - set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); out_skip: return LRU_SKIP; } -static void -nfsd_file_lru_dispose(struct list_head *head) +static unsigned long +nfsd_file_lru_walk_list(struct shrink_control *sc) { - while(!list_empty(head)) { - struct nfsd_file *nf = list_first_entry(head, - struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + LIST_HEAD(head); + struct nfsd_file *nf; + unsigned long ret; + + if (sc) + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &head); + else + ret = list_lru_walk(&nfsd_file_lru, + nfsd_file_lru_cb, + &head, LONG_MAX); + list_for_each_entry(nf, &head, nf_lru) { spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); nfsd_file_do_unhash(nf); spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_put_noref(nf); } + nfsd_file_dispose_list_delayed(&head); + return ret; +} + +static void +nfsd_file_gc(void) +{ + nfsd_file_lru_walk_list(NULL); +} + +static void +nfsd_file_gc_worker(struct work_struct *work) +{ + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -422,12 +496,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - LIST_HEAD(head); - unsigned long ret; - - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); - nfsd_file_lru_dispose(&head); - return ret; + return nfsd_file_lru_walk_list(sc); } static struct shrinker nfsd_file_shrinker = { @@ -489,7 +558,7 @@ nfsd_file_close_inode(struct inode *inode) __nfsd_file_close_inode(inode, hashval, &dispose); trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); - nfsd_file_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } /** @@ -505,16 +574,11 @@ static void nfsd_file_delayed_close(struct work_struct *work) { LIST_HEAD(head); + struct nfsd_fcache_disposal *l = container_of(work, + struct nfsd_fcache_disposal, work); - list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); - - if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) - nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); - - if (!list_empty(&head)) { - nfsd_file_lru_dispose(&head); - flush_delayed_fput(); - } + nfsd_file_list_remove_disposal(&head, l); + nfsd_file_dispose_list(&head); } static int @@ -575,6 +639,10 @@ nfsd_file_cache_init(void) if (nfsd_file_hashtbl) return 0; + nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); + if (!nfsd_filecache_wq) + goto out; + nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, sizeof(*nfsd_file_hashtbl), GFP_KERNEL); if (!nfsd_file_hashtbl) { @@ -628,7 +696,7 @@ nfsd_file_cache_init(void) spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); + INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; out_notifier: @@ -644,6 +712,8 @@ out_err: nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; goto out; } @@ -682,6 +752,88 @@ nfsd_file_cache_purge(struct net *net) } } +static struct nfsd_fcache_disposal * +nfsd_alloc_fcache_disposal(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = kmalloc(sizeof(*l), GFP_KERNEL); + if (!l) + return NULL; + INIT_WORK(&l->work, nfsd_file_delayed_close); + l->net = net; + spin_lock_init(&l->lock); + INIT_LIST_HEAD(&l->freeme); + return l; +} + +static void +nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + rcu_assign_pointer(l->net, NULL); + cancel_work_sync(&l->work); + nfsd_file_dispose_list(&l->freeme); + kfree_rcu(l, rcu); +} + +static void +nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_add_tail_rcu(&l->list, &laundrettes); + spin_unlock(&laundrette_lock); +} + +static void +nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) +{ + spin_lock(&laundrette_lock); + list_del_rcu(&l->list); + spin_unlock(&laundrette_lock); +} + +static int +nfsd_alloc_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + l = nfsd_alloc_fcache_disposal(net); + if (!l) + return -ENOMEM; + nfsd_add_fcache_disposal(l); + return 0; +} + +static void +nfsd_free_fcache_disposal_net(struct net *net) +{ + struct nfsd_fcache_disposal *l; + + rcu_read_lock(); + list_for_each_entry_rcu(l, &laundrettes, list) { + if (l->net != net) + continue; + nfsd_del_fcache_disposal(l); + rcu_read_unlock(); + nfsd_free_fcache_disposal(l); + return; + } + rcu_read_unlock(); +} + +int +nfsd_file_cache_start_net(struct net *net) +{ + return nfsd_alloc_fcache_disposal_net(net); +} + +void +nfsd_file_cache_shutdown_net(struct net *net) +{ + nfsd_file_cache_purge(net); + nfsd_free_fcache_disposal_net(net); +} + void nfsd_file_cache_shutdown(void) { @@ -706,6 +858,8 @@ nfsd_file_cache_shutdown(void) nfsd_file_mark_slab = NULL; kfree(nfsd_file_hashtbl); nfsd_file_hashtbl = NULL; + destroy_workqueue(nfsd_filecache_wq); + nfsd_filecache_wq = NULL; } static bool @@ -789,6 +943,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *new; struct inode *inode; unsigned int hashval; + bool retry = true; /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, @@ -824,6 +979,11 @@ wait_for_construction: /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + if (!retry) { + status = nfserr_jukebox; + goto out; + } + retry = false; nfsd_file_put_noref(nf); goto retry; } @@ -858,7 +1018,7 @@ out: open_file: nf = new; /* Take reference for the hashtable */ - atomic_inc(&nf->nf_ref); + refcount_inc(&nf->nf_ref); __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); list_lru_add(&nfsd_file_lru, &nf->nf_lru); @@ -867,7 +1027,8 @@ open_file: nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, nfsd_file_hashtbl[hashval].nfb_count); spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - atomic_long_inc(&nfsd_filecache_count); + if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) + nfsd_file_gc(); nf->nf_mark = nfsd_file_mark_find_or_create(nf); if (nf->nf_mark) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 851d9abf54c2..7872df5a0fe3 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -19,7 +19,7 @@ */ struct nfsd_file_mark { struct fsnotify_mark nfm_mark; - atomic_t nfm_ref; + refcount_t nfm_ref; }; /* @@ -43,14 +43,17 @@ struct nfsd_file { unsigned long nf_flags; struct inode *nf_inode; unsigned int nf_hashval; - atomic_t nf_ref; + refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; + struct rw_semaphore nf_rwsem; }; int nfsd_file_cache_init(void); void nfsd_file_cache_purge(struct net *); void nfsd_file_cache_shutdown(void); +int nfsd_file_cache_start_net(struct net *net); +void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 9a4ef815fb8c..2baf32311e00 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -40,7 +40,7 @@ struct nfsd_net { struct lock_manager nfsd4_manager; bool grace_ended; - time_t boot_time; + time64_t boot_time; /* internal mount of the "nfsd" pseudofilesystem: */ struct vfsmount *nfsd_mnt; @@ -92,8 +92,8 @@ struct nfsd_net { bool in_grace; const struct nfsd4_client_tracking_ops *client_tracking_ops; - time_t nfsd4_lease; - time_t nfsd4_grace; + time64_t nfsd4_lease; + time64_t nfsd4_grace; bool somebody_reclaimed; bool track_reclaim_completes; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index cea68d8411ac..288bc76b4574 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -203,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_io); nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, rqstp->rq_vec, nvecs, &cnt, - resp->committed); + resp->committed, resp->verf); resp->count = cnt; RETURN_STATUS(nfserr); } @@ -683,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) RETURN_STATUS(nfserr_inval); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count); + nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, + resp->verf); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 195ab7a0fc89..aae514d40b64 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -32,14 +32,14 @@ static u32 nfs3_ftypes[] = { * XDR functions for basic NFS types */ static __be32 * -encode_time3(__be32 *p, struct timespec *time) +encode_time3(__be32 *p, struct timespec64 *time) { *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); return p; } static __be32 * -decode_time3(__be32 *p, struct timespec *time) +decode_time3(__be32 *p, struct timespec64 *time) { time->tv_sec = ntohl(*p++); time->tv_nsec = ntohl(*p++); @@ -167,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct timespec ts; *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); *p++ = htonl((u32) (stat->mode & S_IALLUGO)); *p++ = htonl((u32) stat->nlink); @@ -183,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, *p++ = htonl((u32) MINOR(stat->rdev)); p = encode_fsid(p, fhp); p = xdr_encode_hyper(p, stat->ino); - ts = timespec64_to_timespec(stat->atime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->mtime); - p = encode_time3(p, &ts); - ts = timespec64_to_timespec(stat->ctime); - p = encode_time3(p, &ts); + p = encode_time3(p, &stat->atime); + p = encode_time3(p, &stat->mtime); + p = encode_time3(p, &stat->ctime); return p; } @@ -277,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp) stat.size = inode->i_size; } - fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime); - fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime); + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; fhp->fh_pre_size = stat.size; fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); fhp->fh_pre_saved = true; @@ -330,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); if ((args->check_guard = ntohl(*p++)) != 0) { - struct timespec time; + struct timespec64 time; p = decode_time3(p, &time); args->guardtime = time.tv_sec; } @@ -751,17 +747,13 @@ int nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_writeres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } @@ -1125,16 +1117,12 @@ int nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) { struct nfsd3_commitres *resp = rqstp->rq_resp; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - __be32 verf[2]; p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - /* unique identifier, y2038 overflow can be ignored */ - nfsd_copy_boot_verifier(verf, nn); - *p++ = verf[0]; - *p++ = verf[1]; + *p++ = resp->verf[0]; + *p++ = resp->verf[1]; } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 24534db87e86..c3b11a715082 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -823,7 +823,16 @@ static const struct rpc_program cb_program = { static int max_cb_time(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - return max(nn->nfsd4_lease/10, (time_t)1) * HZ; + + /* + * nfsd4_lease is set to at most one hour in __nfsd4_write_time, + * so we can use 32-bit math on it. Warn if that assumption + * ever stops being true. + */ + if (WARN_ON_ONCE(nn->nfsd4_lease > 3600)) + return 360 * HZ; + + return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } static struct workqueue_struct *callback_wq; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2681c70283ce..e12409eca7cc 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4798667af647..0e75f7fb5fec 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,6 +37,7 @@ #include <linux/falloc.h> #include <linux/slab.h> #include <linux/kthread.h> +#include <linux/sunrpc/addr.h> #include "idmap.h" #include "cache.h" @@ -232,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru if (!*resfh) return nfserr_jukebox; fh_init(*resfh, NFS4_FHSIZE); - open->op_truncate = 0; + open->op_truncate = false; if (open->op_create) { /* FIXME: check session persistence and pnfs flags. @@ -365,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; - open->op_created = 0; + open->op_created = false; /* * RFC5661 18.51.3 * Before RECLAIM_COMPLETE done, server should deny new lock @@ -503,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_putfh *putfh = &u->putfh; + __be32 ret; fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); + ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + if (ret == nfserr_stale && putfh->no_verify) { + SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN); + ret = 0; + } +#endif + return ret; } static __be32 @@ -530,9 +539,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); - if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) { memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } return nfs_ok; } @@ -542,9 +551,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { fh_dup2(&cstate->save_fh, &cstate->current_fh); - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG); } return nfs_ok; } @@ -581,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_commit *commit = &u->commit; - gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp)); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, - commit->co_count); + commit->co_count, + (__be32 *)commit->co_verf.data); } static __be32 @@ -776,7 +785,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &read->rd_stateid, RD_STATE, - &read->rd_nf); + &read->rd_nf, NULL); if (status) { dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); goto out; @@ -948,7 +957,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, - WR_STATE, NULL); + WR_STATE, NULL, NULL); if (status) { dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); return status; @@ -975,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, - 0, (time_t)0); + 0, (time64_t)0); out: fh_drop_write(&cstate->current_fh); return status; @@ -999,22 +1008,22 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, trace_nfsd_write_start(rqstp, &cstate->current_fh, write->wr_offset, cnt); status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - stateid, WR_STATE, &nf); + stateid, WR_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); return status; } write->wr_how_written = write->wr_stable_how; - gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp)); nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, &write->wr_head, write->wr_buflen); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file, + status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written); + write->wr_how_written, + (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); write->wr_bytes_written = cnt; @@ -1034,14 +1043,14 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, - src_stateid, RD_STATE, src); + src_stateid, RD_STATE, src, NULL); if (status) { dprintk("NFSD: %s: couldn't process src stateid!\n", __func__); goto out; } status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, - dst_stateid, WR_STATE, dst); + dst_stateid, WR_STATE, dst, NULL); if (status) { dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__); goto out_put_src; @@ -1076,8 +1085,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos, - dst->nf_file, clone->cl_dst_pos, clone->cl_count, + status = nfsd4_clone_file_range(src, clone->cl_src_pos, + dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); nfsd_file_put(dst); @@ -1135,6 +1144,207 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) while ((copy = nfsd4_get_copy(clp)) != NULL) nfsd4_stop_copy(copy); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + +extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid); +extern void nfs42_ssc_close(struct file *filep); + +extern void nfs_sb_deactive(struct super_block *sb); + +#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" + +/** + * Support one copy source server for now. + */ +static __be32 +nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, + struct vfsmount **mount) +{ + struct file_system_type *type; + struct vfsmount *ss_mnt; + struct nfs42_netaddr *naddr; + struct sockaddr_storage tmp_addr; + size_t tmp_addrlen, match_netid_len = 3; + char *startsep = "", *endsep = "", *match_netid = "tcp"; + char *ipaddr, *dev_name, *raw_data; + int len, raw_len; + __be32 status = nfserr_inval; + + naddr = &nss->u.nl4_addr; + tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, + naddr->addr_len, + (struct sockaddr *)&tmp_addr, + sizeof(tmp_addr)); + if (tmp_addrlen == 0) + goto out_err; + + if (tmp_addr.ss_family == AF_INET6) { + startsep = "["; + endsep = "]"; + match_netid = "tcp6"; + match_netid_len = 4; + } + + if (naddr->netid_len != match_netid_len || + strncmp(naddr->netid, match_netid, naddr->netid_len)) + goto out_err; + + /* Construct the raw data for the vfs_kern_mount call */ + len = RPC_MAX_ADDRBUFLEN + 1; + ipaddr = kzalloc(len, GFP_KERNEL); + if (!ipaddr) + goto out_err; + + rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len); + + /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/ + + raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr); + raw_data = kzalloc(raw_len, GFP_KERNEL); + if (!raw_data) + goto out_free_ipaddr; + + snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr); + + status = nfserr_nodev; + type = get_fs_type("nfs"); + if (!type) + goto out_free_rawdata; + + /* Set the server:<export> for the vfs_kern_mount call */ + dev_name = kzalloc(len + 5, GFP_KERNEL); + if (!dev_name) + goto out_free_rawdata; + snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + + /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ + ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); + module_put(type->owner); + if (IS_ERR(ss_mnt)) + goto out_free_devname; + + status = 0; + *mount = ss_mnt; + +out_free_devname: + kfree(dev_name); +out_free_rawdata: + kfree(raw_data); +out_free_ipaddr: + kfree(ipaddr); +out_err: + return status; +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ + nfs_sb_deactive(ss_mnt->mnt_sb); + mntput(ss_mnt); +} + +/** + * nfsd4_setup_inter_ssc + * + * Verify COPY destination stateid. + * Connect to the source server with NFSv4.1. + * Create the source struct file for nfsd_copy_range. + * Called with COPY cstate: + * SAVED_FH: source filehandle + * CURRENT_FH: destination filehandle + */ +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, struct vfsmount **mount) +{ + struct svc_fh *s_fh = NULL; + stateid_t *s_stid = ©->cp_src_stateid; + __be32 status = nfserr_inval; + + /* Verify the destination stateid and set dst struct file*/ + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + ©->cp_dst_stateid, + WR_STATE, ©->nf_dst, NULL); + if (status) + goto out; + + status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + if (status) + goto out; + + s_fh = &cstate->save_fh; + + copy->c_fh.size = s_fh->fh_handle.fh_size; + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); + memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, + sizeof(stateid_opaque_t)); + + status = 0; +out: + return status; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ + nfs42_ssc_close(src->nf_file); + nfsd_file_put(src); + nfsd_file_put(dst); + mntput(ss_mnt); +} + +#else /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy, + struct vfsmount **mount) +{ + *mount = NULL; + return nfserr_inval; +} + +static void +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, + struct nfsd_file *dst) +{ +} + +static void +nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) +{ +} + +static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt, + struct nfs_fh *src_fh, + nfs4_stateid *stateid) +{ + return NULL; +} +#endif /* CONFIG_NFSD_V4_2_INTER_SSC */ + +static __be32 +nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + struct nfsd4_copy *copy) +{ + return nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, + ©->nf_src, ©->cp_dst_stateid, + ©->nf_dst); +} + +static void +nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) +{ + nfsd_file_put(src); + nfsd_file_put(dst); +} static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { @@ -1200,12 +1410,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) status = nfs_ok; } - nfsd_file_put(copy->nf_src); - nfsd_file_put(copy->nf_dst); + if (!copy->cp_intra) /* Inter server SSC */ + nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, + copy->nf_dst); + else + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); + return status; } -static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; @@ -1215,15 +1429,25 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->nf_src = nfsd_file_get(src->nf_src); + dst->cp_intra = src->cp_intra; + if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + dst->nf_src = nfsd_file_get(src->nf_src); + memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); + memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); + memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); + dst->ss_mnt = src->ss_mnt; + + return 0; } static void cleanup_async_copy(struct nfsd4_copy *copy) { - nfs4_free_cp_state(copy); + nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - nfsd_file_put(copy->nf_src); + if (copy->cp_intra) + nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); spin_unlock(©->cp_clp->async_lock); @@ -1235,7 +1459,24 @@ static int nfsd4_do_async_copy(void *data) struct nfsd4_copy *copy = (struct nfsd4_copy *)data; struct nfsd4_copy *cb_copy; + if (!copy->cp_intra) { /* Inter server SSC */ + copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); + if (!copy->nf_src) { + copy->nfserr = nfserr_serverfault; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(copy->nf_src->nf_file)) { + copy->nfserr = nfserr_offload_denied; + nfsd4_interssc_disconnect(copy->ss_mnt); + goto do_callback; + } + } + copy->nfserr = nfsd4_do_copy(copy, 0); +do_callback: cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!cb_copy) goto out; @@ -1247,6 +1488,8 @@ static int nfsd4_do_async_copy(void *data) &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); nfsd4_run_cb(&cb_copy->cp_cb); out: + if (!copy->cp_intra) + kfree(copy->nf_src); cleanup_async_copy(copy); return 0; } @@ -1259,11 +1502,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, - ©->nf_src, ©->cp_dst_stateid, - ©->nf_dst); - if (status) - goto out; + if (!copy->cp_intra) { /* Inter server SSC */ + if (!inter_copy_offload_enable || copy->cp_synchronous) { + status = nfserr_notsupp; + goto out; + } + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, + ©->ss_mnt); + if (status) + return nfserr_offload_denied; + } else { + status = nfsd4_setup_intra_ssc(rqstp, cstate, copy); + if (status) + return status; + } copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, @@ -1274,15 +1526,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) - goto out; - if (!nfs4_init_cp_state(nn, copy)) { - kfree(async_copy); - goto out; - } + goto out_err; + if (!nfs4_init_copy_state(nn, copy)) + goto out_err; refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid, sizeof(copy->cp_stateid)); - dup_copy_fields(copy, async_copy); + status = dup_copy_fields(copy, async_copy); + if (status) + goto out_err; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -1293,13 +1545,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_unlock(&async_copy->cp_clp->async_lock); wake_up_process(async_copy->copy_task); status = nfs_ok; - } else + } else { status = nfsd4_do_copy(copy, 1); + } out: return status; out_err: if (async_copy) cleanup_async_copy(async_copy); + status = nfserrno(-ENOMEM); + if (!copy->cp_intra) + nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -1310,7 +1566,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1326,16 +1582,61 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, union nfsd4_op_u *u) { struct nfsd4_offload_status *os = &u->offload_status; - __be32 status = 0; struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; copy = find_async_copy(clp, &os->stateid); - if (copy) + if (!copy) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + return manage_cpntf_state(nn, &os->stateid, clp, NULL); + } else nfsd4_stop_copy(copy); - else - status = nfserr_bad_stateid; + return nfs_ok; +} + +static __be32 +nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_copy_notify *cn = &u->copy_notify; + __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_stid *stid; + struct nfs4_cpntf_state *cps; + struct nfs4_client *clp = cstate->clp; + + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, + &cn->cpn_src_stateid, RD_STATE, NULL, + &stid); + if (status) + return status; + + cn->cpn_sec = nn->nfsd4_lease; + cn->cpn_nsec = 0; + + status = nfserrno(-ENOMEM); + cps = nfs4_alloc_init_cpntf_state(nn, stid); + if (!cps) + goto out; + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); + memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); + + /* For now, only return one server address in cpn_src, the + * address used by the client to connect to this server. + */ + cn->cpn_src.nl4_type = NL4_NETADDR; + status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, + &cn->cpn_src.u.nl4_addr); + WARN_ON_ONCE(status); + if (status) { + nfs4_put_cpntf_state(nn, cps); + goto out; + } +out: + nfs4_put_stid(stid); return status; } @@ -1348,7 +1649,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &fallocate->falloc_stateid, - WR_STATE, &nf); + WR_STATE, &nf, NULL); if (status != nfs_ok) { dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n"); return status; @@ -1407,7 +1708,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &seek->seek_stateid, - RD_STATE, &nf); + RD_STATE, &nf, NULL); if (status) { dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n"); return status; @@ -1912,6 +2213,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp, - rqstp->rq_auth_slack; } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ + struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL; + struct nfsd4_copy *copy; + struct nfsd4_putfh *putfh; + int i; + + /* traverse all operation and if it's a COPY compound, mark the + * source filehandle to skip verification + */ + for (i = 0; i < args->opcnt; i++) { + op = &args->ops[i]; + if (op->opnum == OP_PUTFH) + current_op = op; + else if (op->opnum == OP_SAVEFH) + saved_op = current_op; + else if (op->opnum == OP_RESTOREFH) + current_op = saved_op; + else if (op->opnum == OP_COPY) { + copy = (struct nfsd4_copy *)&op->u; + if (!saved_op) { + op->status = nfserr_nofilehandle; + return; + } + putfh = (struct nfsd4_putfh *)&saved_op->u; + if (!copy->cp_intra) + putfh->no_verify = true; + } + } +} +#else +static void +check_if_stalefh_allowed(struct nfsd4_compoundargs *args) +{ +} +#endif + /* * COMPOUND call. */ @@ -1960,6 +2300,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) resp->opcnt = 1; goto encode_op; } + check_if_stalefh_allowed(args); trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { @@ -1975,13 +2316,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) op->status = nfsd4_open_omfg(rqstp, cstate, op); goto encode_op; } - - if (!current_fh->fh_dentry) { + if (!current_fh->fh_dentry && + !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) { if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } - } else if (current_fh->fh_export->ex_fslocs.migrated && + } else if (current_fh->fh_export && + current_fh->fh_export->ex_fslocs.migrated && !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; @@ -2025,7 +2367,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); - if (need_wrongsec_check(rqstp)) + if (current_fh->fh_export && + need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } encode_op: @@ -2292,6 +2635,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } +static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 3 /* cnr_lease_time */ + + 1 /* We support one cnr_source_server */ + + 1 /* cnr_stateid seq */ + + op_encode_stateid_maxsz /* cnr_stateid */ + + 1 /* num cnr_source_server*/ + + 1 /* nl4_type */ + + 1 /* nl4 size */ + + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */) + * sizeof(__be32); +} + #ifdef CONFIG_NFSD_PNFS static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { @@ -2716,6 +3074,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_OFFLOAD_CANCEL", .op_rsize_bop = nfsd4_only_status_rsize, }, + [OP_COPY_NOTIFY] = { + .op_func = nfsd4_copy_notify, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_COPY_NOTIFY", + .op_rsize_bop = nfsd4_copy_notify_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2481e7662128..a8fb18609146 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1445,7 +1445,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn) } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; - cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time; + cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1782,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp) } static char * -nfsd4_cltrack_grace_start(time_t grace_start) +nfsd4_cltrack_grace_start(time64_t grace_start) { int copied; size_t len; @@ -1795,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start) if (!result) return result; - copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld", + copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld", grace_start); if (copied >= len) { /* just return nothing if output was truncated */ @@ -2004,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn) char *legacy; char timestr[22]; /* FIXME: better way to determine max size? */ - sprintf(timestr, "%ld", nn->boot_time); + sprintf(timestr, "%lld", nn->boot_time); legacy = nfsd4_cltrack_legacy_topdir(); nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL); kfree(legacy); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 369e574c5092..65cfe9ab47be 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -80,6 +80,7 @@ static u64 current_sessionid = 1; static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); static void nfs4_free_ol_stateid(struct nfs4_stid *stid); void nfsd4_end_grace(struct nfsd_net *nn); +static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); /* Locking: */ @@ -170,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp) clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); } static void put_client_renew_locked(struct nfs4_client *clp) @@ -722,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla /* Will be incremented before return to client: */ refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); + INIT_LIST_HEAD(&stid->sc_cp_list); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -741,30 +743,76 @@ out_free: /* * Create a unique stateid_t to represent each COPY. */ -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, + unsigned char sc_type) { int new_id; + stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; + stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->sc_type = sc_type; + idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); - new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); + stid->stid.si_opaque.so_id = new_id; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) return 0; - copy->cp_stateid.si_opaque.so_id = new_id; - copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; - copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; return 1; } -void nfs4_free_cp_state(struct nfsd4_copy *copy) +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +{ + return nfs4_init_cp_state(nn, ©->cp_stateid, NFS4_COPY_STID); +} + +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid) +{ + struct nfs4_cpntf_state *cps; + + cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); + if (!cps) + return NULL; + cps->cpntf_time = ktime_get_boottime_seconds(); + refcount_set(&cps->cp_stateid.sc_count, 1); + if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) + goto out_free; + spin_lock(&nn->s2s_cp_lock); + list_add(&cps->cp_list, &p_stid->sc_cp_list); + spin_unlock(&nn->s2s_cp_lock); + return cps; +out_free: + kfree(cps); + return NULL; +} + +void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; + WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); - idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); + idr_remove(&nn->s2s_cp_stateids, + copy->cp_stateid.stid.si_opaque.so_id); + spin_unlock(&nn->s2s_cp_lock); +} + +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) +{ + struct nfs4_cpntf_state *cps; + struct nfsd_net *nn; + + nn = net_generic(net, nfsd_net_id); + spin_lock(&nn->s2s_cp_lock); + while (!list_empty(&stid->sc_cp_list)) { + cps = list_first_entry(&stid->sc_cp_list, + struct nfs4_cpntf_state, cp_list); + _free_cpntf_state_locked(nn, cps); + } spin_unlock(&nn->s2s_cp_lock); } @@ -806,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid) static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; - time_t swap_time; + time64_t swap_time; int new; /* index into 'set' */ DECLARE_BITMAP(set[2], 256); } blocked_delegations; @@ -818,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { spin_lock(&blocked_delegations_lock); - if (seconds_since_boot() - bd->swap_time > 30) { + if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); bd->new = 1-bd->new; - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); } @@ -856,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh) __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) - bd->swap_time = seconds_since_boot(); + bd->swap_time = ktime_get_seconds(); bd->entries += 1; spin_unlock(&blocked_delegations_lock); } @@ -915,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s) return; } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) @@ -1862,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n", + dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", clid->cl_boot, clid->cl_id, nn->boot_time); return 1; } @@ -2215,14 +2264,14 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ - verf[0] = (__force __be32)get_seconds(); + verf[0] = (__force __be32)(u32)ktime_get_real_seconds(); verf[1] = (__force __be32)nn->clverifier_counter++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { - clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_boot = (u32)nn->boot_time; clp->cl_clientid.cl_id = nn->clientid_counter++; gen_confirm(clp, nn); } @@ -2292,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v) clp->cl_nii_domain.len); seq_printf(m, "\nImplementation name: "); seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len); - seq_printf(m, "\nImplementation time: [%ld, %ld]\n", + seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } drop_client(clp); @@ -2612,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, gen_clid(clp, nn); kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); - clp->cl_time = get_seconds(); + clp->cl_time = ktime_get_boottime_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); @@ -2946,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp, xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL); if (!clp->cl_nii_name.data) return nfserr_jukebox; - clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec; - clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec; + clp->cl_nii_time = exid->nii_time; return 0; } @@ -3373,7 +3421,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir) case NFS4_CDFC4_BACK_OR_BOTH: *dir = NFS4_CDFC4_BOTH; return nfs_ok; - }; + } return nfserr_inval; } @@ -4283,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); - oo->oo_time = get_seconds(); + oo->oo_time = ktime_get_boottime_seconds(); spin_unlock(&nn->client_lock); if (last) nfs4_put_stid(&last->st_stid); @@ -4378,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) */ spin_lock(&state_lock); if (dp->dl_time == 0) { - dp->dl_time = get_seconds(); + dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } spin_unlock(&state_lock); @@ -4490,7 +4538,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 static __be32 lookup_clientid(clientid_t *clid, struct nfsd4_compound_state *cstate, - struct nfsd_net *nn) + struct nfsd_net *nn, + bool sessions) { struct nfs4_client *found; @@ -4511,7 +4560,7 @@ static __be32 lookup_clientid(clientid_t *clid, */ WARN_ON_ONCE(cstate->session); spin_lock(&nn->client_lock); - found = find_confirmed_client(clid, false, nn); + found = find_confirmed_client(clid, sessions, nn); if (!found) { spin_unlock(&nn->client_lock); return nfserr_expired; @@ -4544,7 +4593,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - status = lookup_clientid(clientid, cstate, nn); + status = lookup_clientid(clientid, cstate, nn, false); if (status) return status; clp = cstate->clp; @@ -4672,7 +4721,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); + return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, @@ -5133,7 +5182,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) goto out; clp = cstate->clp; @@ -5184,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn) */ static bool clients_still_reclaiming(struct nfsd_net *nn) { - unsigned long now = get_seconds(); - unsigned long double_grace_period_end = nn->boot_time + - 2 * nn->nfsd4_lease; + time64_t double_grace_period_end = nn->boot_time + + 2 * nn->nfsd4_lease; if (nn->track_reclaim_completes && atomic_read(&nn->nr_reclaim_complete) == @@ -5199,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) * If we've given them *two* lease times to reclaim, and they're * still not done, give up: */ - if (time_after(now, double_grace_period_end)) + if (ktime_get_boottime_seconds() > double_grace_period_end) return false; return true; } -static time_t +static time64_t nfs4_laundromat(struct nfsd_net *nn) { struct nfs4_client *clp; @@ -5213,8 +5261,11 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time_t cutoff = get_seconds() - nn->nfsd4_lease; - time_t t, new_timeo = nn->nfsd4_lease; + time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; + time64_t t, new_timeo = nn->nfsd4_lease; + struct nfs4_cpntf_state *cps; + copy_stateid_t *cps_t; + int i; dprintk("NFSD: laundromat service - starting\n"); @@ -5225,10 +5276,20 @@ nfs4_laundromat(struct nfsd_net *nn) dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); + + spin_lock(&nn->s2s_cp_lock); + idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { + cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); + if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && + cps->cpntf_time > cutoff) + _free_cpntf_state_locked(nn, cps); + } + spin_unlock(&nn->s2s_cp_lock); + spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + if (clp->cl_time > cutoff) { t = clp->cl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5251,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + if (dp->dl_time > cutoff) { t = dp->dl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5271,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, - (unsigned long)cutoff)) { + if (oo->oo_time > cutoff) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5302,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (time_after((unsigned long)nbl->nbl_time, - (unsigned long)cutoff)) { + if (nbl->nbl_time > cutoff) { t = nbl->nbl_time - cutoff; new_timeo = min(new_timeo, t); break; @@ -5320,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn) free_blocked_lock(nbl); } out: - new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); + new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); return new_timeo; } @@ -5330,13 +5389,13 @@ static void laundromat_main(struct work_struct *); static void laundromat_main(struct work_struct *laundry) { - time_t t; + time64_t t; struct delayed_work *dwork = to_delayed_work(laundry); struct nfsd_net *nn = container_of(dwork, struct nfsd_net, laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); + dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } @@ -5521,7 +5580,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn, + false); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; @@ -5600,6 +5660,85 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, out: return status; } +static void +_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + return; + list_del(&cps->cp_list); + idr_remove(&nn->s2s_cp_stateids, + cps->cp_stateid.stid.si_opaque.so_id); + kfree(cps); +} +/* + * A READ from an inter server to server COPY will have a + * copy stateid. Look up the copy notify stateid from the + * idr structure and take a reference on it. + */ +__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps) +{ + copy_stateid_t *cps_t; + struct nfs4_cpntf_state *state = NULL; + + if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id) + return nfserr_bad_stateid; + spin_lock(&nn->s2s_cp_lock); + cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id); + if (cps_t) { + state = container_of(cps_t, struct nfs4_cpntf_state, + cp_stateid); + if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + state = NULL; + goto unlock; + } + if (!clp) + refcount_inc(&state->cp_stateid.sc_count); + else + _free_cpntf_state_locked(nn, state); + } +unlock: + spin_unlock(&nn->s2s_cp_lock); + if (!state) + return nfserr_bad_stateid; + if (!clp && state) + *cps = state; + return 0; +} + +static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_stid **stid) +{ + __be32 status; + struct nfs4_cpntf_state *cps = NULL; + struct nfsd4_compound_state cstate; + + status = manage_cpntf_state(nn, st, NULL, &cps); + if (status) + return status; + + cps->cpntf_time = ktime_get_boottime_seconds(); + memset(&cstate, 0, sizeof(cstate)); + status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); + if (status) + goto out; + status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + stid, nn); + put_client_renew(cstate.clp); +out: + nfs4_put_cpntf_state(nn, cps); + return status; +} + +void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) +{ + spin_lock(&nn->s2s_cp_lock); + _free_cpntf_state_locked(nn, cps); + spin_unlock(&nn->s2s_cp_lock); +} /* * Checks for stateid operations @@ -5607,7 +5746,8 @@ out: __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **nfp) + stateid_t *stateid, int flags, struct nfsd_file **nfp, + struct nfs4_stid **cstid) { struct inode *ino = d_inode(fhp->fh_dentry); struct net *net = SVC_NET(rqstp); @@ -5629,6 +5769,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); + if (status == nfserr_bad_stateid) + status = find_cpntf_state(nn, stateid, &s); if (status) return status; status = nfsd4_stid_check_stateid_generation(stateid, s, @@ -5656,8 +5798,12 @@ done: if (status == nfs_ok && nfp) status = nfs4_check_file(rqstp, fhp, s, nfp, flags); out: - if (s) - nfs4_put_stid(s); + if (s) { + if (!status && cstid) + *cstid = s; + else + nfs4_put_stid(s); + } return status; } @@ -6550,7 +6696,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = ktime_get_boottime_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); @@ -6657,7 +6803,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, cstate, nn); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn, + false); if (status) goto out; } @@ -6841,7 +6988,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return status; @@ -6988,7 +7135,7 @@ nfs4_check_open_reclaim(clientid_t *clid, __be32 status; /* find clientid in conf_id_hashtbl */ - status = lookup_clientid(clid, cstate, nn); + status = lookup_clientid(clid, cstate, nn, false); if (status) return nfserr_reclaim_bad; @@ -7641,7 +7788,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; nn->unconf_name_tree = RB_ROOT; - nn->boot_time = get_seconds(); + nn->boot_time = ktime_get_real_seconds(); nn->grace_ended = false; nn->nfsd4_manager.block_opens = true; INIT_LIST_HEAD(&nn->nfsd4_manager.list); @@ -7710,7 +7857,7 @@ nfs4_state_start_net(struct net *net) nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) goto skip_grace; - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; @@ -7786,7 +7933,8 @@ nfs4_state_shutdown(void) static void get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { - if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) && + CURRENT_STATEID(stateid)) memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); } @@ -7795,14 +7943,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) { if (cstate->minorversion) { memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); - SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } } void clear_current_stateid(struct nfsd4_compound_state *cstate) { - CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG); } /* diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index d2dc4c0e22e8..9761512674a0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -40,6 +40,7 @@ #include <linux/utsname.h> #include <linux/pagemap.h> #include <linux/sunrpc/svcauth_gss.h> +#include <linux/sunrpc/addr.h> #include "idmap.h" #include "acl.h" @@ -1744,10 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) DECODE_TAIL; } +static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, + struct nl4_server *ns) +{ + DECODE_HEAD; + struct nfs42_netaddr *naddr; + + READ_BUF(4); + ns->nl4_type = be32_to_cpup(p++); + + /* currently support for 1 inter-server source server */ + switch (ns->nl4_type) { + case NL4_NETADDR: + naddr = &ns->u.nl4_addr; + + READ_BUF(4); + naddr->netid_len = be32_to_cpup(p++); + if (naddr->netid_len > RPCBIND_MAXNETIDLEN) + goto xdr_error; + + READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ + COPYMEM(naddr->netid, naddr->netid_len); + + naddr->addr_len = be32_to_cpup(p++); + if (naddr->addr_len > RPCBIND_MAXUADDRLEN) + goto xdr_error; + + READ_BUF(naddr->addr_len); + COPYMEM(naddr->addr, naddr->addr_len); + break; + default: + goto xdr_error; + } + DECODE_TAIL; +} + static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { DECODE_HEAD; + struct nl4_server *ns_dummy; + int i, count; status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); if (status) @@ -1762,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) p = xdr_decode_hyper(p, ©->cp_count); p++; /* ca_consecutive: we always do consecutive copies */ copy->cp_synchronous = be32_to_cpup(p++); - /* tmp = be32_to_cpup(p); Source server list not supported */ + + count = be32_to_cpup(p++); + + copy->cp_intra = false; + if (count == 0) { /* intra-server copy */ + copy->cp_intra = true; + goto intra; + } + + /* decode all the supplied server addresses but use first */ + status = nfsd4_decode_nl4_server(argp, ©->cp_src); + if (status) + return status; + + ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); + if (ns_dummy == NULL) + return nfserrno(-ENOMEM); + for (i = 0; i < count - 1; i++) { + status = nfsd4_decode_nl4_server(argp, ns_dummy); + if (status) { + kfree(ns_dummy); + return status; + } + } + kfree(ns_dummy); +intra: DECODE_TAIL; } @@ -1775,6 +1838,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, } static __be32 +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + struct nfsd4_copy_notify *cn) +{ + int status; + + status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); + if (status) + return status; + return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); +} + +static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { DECODE_HEAD; @@ -1875,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { /* new operations for NFSv4.2 */ [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy, - [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify, [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate, [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -2024,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, */ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) { - struct timespec ts; + struct timespec64 ts; u32 ns; ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran); - ts = ns_to_timespec(ns); + ts = ns_to_timespec64(ns); p = xdr_encode_hyper(p, ts.tv_sec); *p++ = cpu_to_be32(ts.tv_nsec); @@ -4244,6 +4319,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, } static __be32 +nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) +{ + struct xdr_stream *xdr = &resp->xdr; + struct nfs42_netaddr *addr; + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = cpu_to_be32(ns->nl4_type); + + switch (ns->nl4_type) { + case NL4_NETADDR: + addr = &ns->u.nl4_addr; + + /* netid_len, netid, uaddr_len, uaddr (port included + * in RPCBIND_MAXUADDRLEN) + */ + p = xdr_reserve_space(xdr, + 4 /* netid len */ + + (XDR_QUADLEN(addr->netid_len) * 4) + + 4 /* uaddr len */ + + (XDR_QUADLEN(addr->addr_len) * 4)); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(addr->netid_len); + p = xdr_encode_opaque_fixed(p, addr->netid, + addr->netid_len); + *p++ = cpu_to_be32(addr->addr_len); + p = xdr_encode_opaque_fixed(p, addr->addr, + addr->addr_len); + break; + default: + WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR); + return nfserr_inval; + } + + return 0; +} + +static __be32 nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy *copy) { @@ -4277,6 +4392,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, } static __be32 +nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_copy_notify *cn) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + if (nfserr) + return nfserr; + + /* 8 sec, 4 nsec */ + p = xdr_reserve_space(xdr, 12); + if (!p) + return nfserr_resource; + + /* cnr_lease_time */ + p = xdr_encode_hyper(p, cn->cpn_sec); + *p++ = cpu_to_be32(cn->cpn_nsec); + + /* cnr_stateid */ + nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid); + if (nfserr) + return nfserr; + + /* cnr_src.nl_nsvr */ + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p++ = cpu_to_be32(1); + + return nfsd42_encode_nl4_server(resp, &cn->cpn_src); +} + +static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_seek *seek) { @@ -4373,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { /* NFSv4.2 operations */ [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy, - [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop, + [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify, [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop, [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, @@ -4500,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) __be32 *p; struct nfs4_replay *rp = op->replay; - BUG_ON(!rp); - p = xdr_reserve_space(xdr, 8 + rp->rp_buflen); if (!p) { WARN_ON_ONCE(1); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7eb919f1b13f..e109a1007704 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { char *mesg = buf; int rv, i; @@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, *time = i; } - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time); + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time); } static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, - time_t *time, struct nfsd_net *nn) + time64_t *time, struct nfsd_net *nn) { ssize_t rv; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 57b93d95fa5c..2ab5569126b8 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -19,6 +19,7 @@ #include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/addr.h> #include <uapi/linux/nfsd/debug.h> @@ -142,7 +143,6 @@ int nfs4_state_start(void); int nfs4_state_start_net(struct net *net); void nfs4_state_shutdown(void); void nfs4_state_shutdown_net(struct net *net); -void nfs4_reset_lease(time_t leasetime); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); @@ -153,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; } static inline int nfs4_state_start_net(struct net *net) { return 0; } static inline void nfs4_state_shutdown(void) { } static inline void nfs4_state_shutdown_net(struct net *net) { } -static inline void nfs4_reset_lease(time_t leasetime) { } static inline int nfs4_reset_recoverydir(char *recdir) { return 0; } static inline char * nfs4_recoverydir(void) {return NULL; } static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) @@ -387,6 +386,37 @@ void nfsd_lockd_shutdown(void); extern const u32 nfsd_suppattrs[3][3]; +static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr, + struct nfs42_netaddr *netaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)addr; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + unsigned int port; + size_t ret_addr, ret_port; + + switch (addr->sa_family) { + case AF_INET: + port = ntohs(sin->sin_port); + sprintf(netaddr->netid, "tcp"); + netaddr->netid_len = 3; + break; + case AF_INET6: + port = ntohs(sin6->sin6_port); + sprintf(netaddr->netid, "tcp6"); + netaddr->netid_len = 4; + break; + default: + return nfserr_inval; + } + ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr)); + ret_port = snprintf(netaddr->addr + ret_addr, + RPCBIND_MAXUADDRLEN + 1 - ret_addr, + ".%u.%u", port >> 8, port & 0xff); + WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr); + netaddr->addr_len = ret_addr + ret_port; + return 0; +} + static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2) { return !((bm1[0] & ~bm2[0]) || diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 755e256a9103..56cfbc361561 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -35,15 +35,15 @@ typedef struct svc_fh { bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ - + int fh_flags; /* FH flags */ #ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ /* Pre-op attributes saved during fh_lock */ __u64 fh_pre_size; /* size before operation */ - struct timespec fh_pre_mtime; /* mtime before oper */ - struct timespec fh_pre_ctime; /* ctime before oper */ + struct timespec64 fh_pre_mtime; /* mtime before oper */ + struct timespec64 fh_pre_ctime; /* ctime before oper */ /* * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode) * to find out if it is valid. @@ -56,6 +56,9 @@ typedef struct svc_fh { #endif /* CONFIG_NFSD_V3 */ } svc_fh; +#define NFSD4_FH_FOREIGN (1<<0) +#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) +#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f)) enum nfsd_fsid { FSID_DEV = 0, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c83ddac22f38..543bbe0a556e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) * Solaris, at least, doesn't seem to care what the time * request is. We require it be within 30 minutes of now. */ - time_t delta = iap->ia_atime.tv_sec - get_seconds(); + time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds(); nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); if (nfserr) @@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); done: return nfsd_return_attrs(nfserr, resp); } @@ -226,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) return nfserr_io; nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC); + &cnt, NFS_DATA_SYNC, NULL); return nfsd_return_attrs(nfserr, resp); } @@ -380,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0); + nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0); } out_unlock: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e8bee8ff30c5..3b77b904212d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,12 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC +bool inter_copy_offload_enable; +EXPORT_SYMBOL_GPL(inter_copy_offload_enable); +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) @@ -391,20 +397,25 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre ret = lockd_up(net, cred); if (ret) goto out_socks; - nn->lockd_up = 1; + nn->lockd_up = true; } - ret = nfs4_state_start_net(net); + ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; + ret = nfs4_state_start_net(net); + if (ret) + goto out_filecache; nn->nfsd_net_up = true; return 0; +out_filecache: + nfsd_file_cache_shutdown_net(net); out_lockd: if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } out_socks: nfsd_shutdown_generic(); @@ -415,11 +426,11 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_purge(net); + nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); - nn->lockd_up = 0; + nn->lockd_up = false; } nn->nfsd_net_up = false; nfsd_shutdown_generic(); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d61b83b9654c..68d3f30ee760 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -56,6 +56,14 @@ typedef struct { stateid_opaque_t si_opaque; } stateid_t; +typedef struct { + stateid_t stid; +#define NFS4_COPY_STID 1 +#define NFS4_COPYNOTIFY_STID 2 + unsigned char sc_type; + refcount_t sc_count; +} copy_stateid_t; + #define STATEID_FMT "(%08x/%08x/%08x/%08x)" #define STATEID_VAL(s) \ (s)->si_opaque.so_clid.cl_boot, \ @@ -96,6 +104,7 @@ struct nfs4_stid { #define NFS4_REVOKED_DELEG_STID 16 #define NFS4_CLOSED_DELEG_STID 32 #define NFS4_LAYOUT_STID 64 + struct list_head sc_cp_list; unsigned char sc_type; stateid_t sc_stateid; spinlock_t sc_lock; @@ -104,6 +113,17 @@ struct nfs4_stid { void (*sc_free)(struct nfs4_stid *); }; +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the + * parent OPEN/LOCK/DELEG stateid. + */ +struct nfs4_cpntf_state { + copy_stateid_t cp_stateid; + struct list_head cp_list; /* per parent nfs4_stid */ + stateid_t cp_p_stateid; /* copy of parent's stateid */ + clientid_t cp_p_clid; /* copy of parent's clid */ + time64_t cpntf_time; /* last time stateid used */ +}; + /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -132,7 +152,7 @@ struct nfs4_delegation { struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; u32 dl_type; - time_t dl_time; + time64_t dl_time; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -310,7 +330,7 @@ struct nfs4_client { #endif struct xdr_netobj cl_name; /* id generated by client */ nfs4_verifier cl_verifier; /* generated by client */ - time_t cl_time; /* time of last lease renewal */ + time64_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ @@ -320,7 +340,7 @@ struct nfs4_client { /* NFSv4.1 client implementation id: */ struct xdr_netobj cl_nii_domain; struct xdr_netobj cl_nii_name; - struct timespec cl_nii_time; + struct timespec64 cl_nii_time; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; @@ -449,7 +469,7 @@ struct nfs4_openowner { */ struct list_head oo_close_lru; struct nfs4_ol_stateid *oo_last_closed_stid; - time_t oo_time; /* time of placement on so_close_lru */ + time64_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 unsigned char oo_flags; }; @@ -606,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time64_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; @@ -618,14 +638,17 @@ struct nfsd4_copy; extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, - stateid_t *stateid, int flags, struct nfsd_file **filp); + stateid_t *stateid, int flags, struct nfsd_file **filp, + struct nfs4_stid **cstid); __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); -void nfs4_free_cp_state(struct nfsd4_copy *copy); +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); +void nfs4_free_copy_state(struct nfsd4_copy *copy); +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, + struct nfs4_stid *p_stid); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); @@ -655,6 +678,11 @@ void put_nfs4_file(struct nfs4_file *fi); extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); +extern void nfs4_put_cpntf_state(struct nfsd_net *nn, + struct nfs4_cpntf_state *cps); +extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, + struct nfs4_client *clp, + struct nfs4_cpntf_state **cps); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index ffc78a0e28b2..06dd0d337049 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -166,6 +166,12 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); +TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); +TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); +TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); +TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); + #define show_nf_flags(val) \ __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ @@ -195,7 +201,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class, TP_fast_assign( __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; - __entry->nf_ref = atomic_read(&nf->nf_ref); + __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; @@ -228,7 +234,7 @@ TRACE_EVENT(nfsd_file_acquire, TP_ARGS(rqstp, hash, inode, may_flags, nf, status), TP_STRUCT__entry( - __field(__be32, xid) + __field(u32, xid) __field(unsigned int, hash) __field(void *, inode) __field(unsigned int, may_flags) @@ -236,27 +242,27 @@ TRACE_EVENT(nfsd_file_acquire, __field(unsigned long, nf_flags) __field(unsigned char, nf_may) __field(struct file *, nf_file) - __field(__be32, status) + __field(u32, status) ), TP_fast_assign( - __entry->xid = rqstp->rq_xid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; - __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0; + __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; __entry->nf_flags = nf ? nf->nf_flags : 0; __entry->nf_may = nf ? nf->nf_may : 0; __entry->nf_file = nf ? nf->nf_file : NULL; - __entry->status = status; + __entry->status = be32_to_cpu(status); ), TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", - be32_to_cpu(__entry->xid), __entry->hash, __entry->inode, + __entry->xid, __entry->hash, __entry->inode, show_nf_may(__entry->may_flags), __entry->nf_ref, show_nf_flags(__entry->nf_flags), show_nf_may(__entry->nf_may), __entry->nf_file, - be32_to_cpu(__entry->status)) + __entry->status) ); DECLARE_EVENT_CLASS(nfsd_file_search_class, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c0dc491537a6..0aa02eb18bd3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -280,19 +280,25 @@ out: * Commit metadata changes to stable storage. */ static int -commit_metadata(struct svc_fh *fhp) +commit_inode_metadata(struct inode *inode) { - struct inode *inode = d_inode(fhp->fh_dentry); const struct export_operations *export_ops = inode->i_sb->s_export_op; - if (!EX_ISSYNC(fhp->fh_export)) - return 0; - if (export_ops->commit_metadata) return export_ops->commit_metadata(inode); return sync_inode_metadata(inode, 1); } +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + return commit_inode_metadata(inode); +} + /* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects. @@ -358,7 +364,7 @@ out_nfserrno: */ __be32 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) + int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; @@ -524,23 +530,39 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif -__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, - u64 dst_pos, u64 count, bool sync) +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) { + struct file *src = nf_src->nf_file; + struct file *dst = nf_dst->nf_file; loff_t cloned; + __be32 ret = 0; + down_write(&nf_dst->nf_rwsem); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); - if (cloned < 0) - return nfserrno(cloned); - if (count && cloned != count) - return nfserrno(-EINVAL); + if (cloned < 0) { + ret = nfserrno(cloned); + goto out_err; + } + if (count && cloned != count) { + ret = nfserrno(-EINVAL); + goto out_err; + } if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); - if (status < 0) - return nfserrno(status); + + if (!status) + status = commit_inode_metadata(file_inode(src)); + if (status < 0) { + nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, + nfsd_net_id)); + ret = nfserrno(status); + } } - return 0; +out_err: + up_write(&nf_dst->nf_rwsem); + return ret; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, @@ -938,10 +960,12 @@ static int wait_for_concurrent_writes(struct file *file) } __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable) + unsigned long *cnt, int stable, + __be32 *verf) { + struct file *file = nf->nf_file; struct svc_export *exp; struct iov_iter iter; __be32 nfserr; @@ -972,9 +996,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) + if (flags & RWF_SYNC) { + down_write(&nf->nf_rwsem); + host_err = vfs_iter_write(file, &iter, &pos, flags); + if (host_err < 0) + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); + up_write(&nf->nf_rwsem); + } else { + down_read(&nf->nf_rwsem); + if (verf) + nfsd_copy_boot_verifier(verf, + net_generic(SVC_NET(rqstp), + nfsd_net_id)); + host_err = vfs_iter_write(file, &iter, &pos, flags); + up_read(&nf->nf_rwsem); + } + if (host_err < 0) { + nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), + nfsd_net_id)); goto out_nfserr; + } + *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); @@ -1036,7 +1079,8 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable) + struct kvec *vec, int vlen, unsigned long *cnt, int stable, + __be32 *verf) { struct nfsd_file *nf; __be32 err; @@ -1047,8 +1091,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec, - vlen, cnt, stable); + err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, + vlen, cnt, stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); @@ -1067,7 +1111,7 @@ out: */ __be32 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count) + loff_t offset, unsigned long count, __be32 *verf) { struct nfsd_file *nf; loff_t end = LLONG_MAX; @@ -1086,10 +1130,14 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; if (EX_ISSYNC(fhp->fh_export)) { - int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + int err2; + down_write(&nf->nf_rwsem); + err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); switch (err2) { case 0: + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); break; case -EINVAL: err = nfserr_notsupp; @@ -1099,7 +1147,10 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); } - } + up_write(&nf->nf_rwsem); + } else + nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, + nfsd_net_id)); nfsd_file_put(nf); out: @@ -1123,7 +1174,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); /* Callers expect file metadata to be committed here */ return nfserrno(commit_metadata(resfhp)); } @@ -1386,7 +1437,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; break; } /* fall through */ @@ -1395,7 +1446,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, && d_inode(dchild)->i_atime.tv_sec == v_atime && d_inode(dchild)->i_size == 0 ) { if (created) - *created = 1; + *created = true; goto set_attr; } /* fall through */ @@ -1412,7 +1463,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } if (created) - *created = 1; + *created = true; nfsd_check_ignore_resizing(iap); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index cc110a10bfe8..3eb660ad80d1 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -34,6 +34,8 @@ #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) +struct nfsd_file; + /* * Callback function for readdir */ @@ -48,15 +50,16 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time_t); + struct iattr *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 __be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct file *, u64, struct file *, - u64, u64, bool); +__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -71,7 +74,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, struct svc_fh *res, int createmode, u32 *verifier, bool *truncp, bool *created); __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long); + loff_t, unsigned long, __be32 *verf); #endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, @@ -91,11 +94,12 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *, u32 *eof); __be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, int); + struct kvec *, int, unsigned long *, + int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct file *file, loff_t offset, + struct nfsd_file *nf, loff_t offset, struct kvec *vec, int vlen, unsigned long *cnt, - int stable); + int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 99ff9f403ff1..4155fd71714c 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -14,7 +14,7 @@ struct nfsd3_sattrargs { struct svc_fh fh; struct iattr attrs; int check_guard; - time_t guardtime; + time64_t guardtime; }; struct nfsd3_diropargs { @@ -159,6 +159,7 @@ struct nfsd3_writeres { struct svc_fh fh; unsigned long count; int committed; + __be32 verf[2]; }; struct nfsd3_renameres { @@ -223,6 +224,7 @@ struct nfsd3_pathconfres { struct nfsd3_commitres { __be32 status; struct svc_fh fh; + __be32 verf[2]; }; struct nfsd3_getaclres { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f4737d66ee98..db63d39b1507 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -46,9 +46,9 @@ #define CURRENT_STATE_ID_FLAG (1<<0) #define SAVED_STATE_ID_FLAG (1<<1) -#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) -#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) -#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) +#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f)) +#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f)) +#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f)) struct nfsd4_compound_state { struct svc_fh current_fh; @@ -221,6 +221,7 @@ struct nfsd4_lookup { struct nfsd4_putfh { u32 pf_fhlen; /* request */ char *pf_fhval; /* request */ + bool no_verify; /* represents foreigh fh */ }; struct nfsd4_open { @@ -518,11 +519,13 @@ struct nfsd42_write_res { struct nfsd4_copy { /* request */ - stateid_t cp_src_stateid; - stateid_t cp_dst_stateid; - u64 cp_src_pos; - u64 cp_dst_pos; - u64 cp_count; + stateid_t cp_src_stateid; + stateid_t cp_dst_stateid; + u64 cp_src_pos; + u64 cp_dst_pos; + u64 cp_count; + struct nl4_server cp_src; + bool cp_intra; /* both */ bool cp_synchronous; @@ -540,13 +543,18 @@ struct nfsd4_copy { struct nfsd_file *nf_src; struct nfsd_file *nf_dst; - stateid_t cp_stateid; + copy_stateid_t cp_stateid; struct list_head copies; struct task_struct *copy_task; refcount_t refcount; bool stopped; + + struct vfsmount *ss_mnt; + struct nfs_fh c_fh; + nfs4_stateid stateid; }; +extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ @@ -568,6 +576,18 @@ struct nfsd4_offload_status { u32 status; }; +struct nfsd4_copy_notify { + /* request */ + stateid_t cpn_src_stateid; + struct nl4_server cpn_dst; + + /* response */ + stateid_t cpn_cnr_stateid; + u64 cpn_sec; + u32 cpn_nsec; + struct nl4_server cpn_src; +}; + struct nfsd4_op { int opnum; const struct nfsd4_operation * opdesc; @@ -627,6 +647,7 @@ struct nfsd4_op { struct nfsd4_clone clone; struct nfsd4_copy copy; struct nfsd4_offload_status offload_status; + struct nfsd4_copy_notify copy_notify; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 25543a966c48..29eaa4544372 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -273,6 +273,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; diff --git a/fs/pipe.c b/fs/pipe.c index 57502c3c0fba..5a34d6c22d4c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1, /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { - DEFINE_WAIT(wait); + DEFINE_WAIT(rdwait); + DEFINE_WAIT(wrwait); /* * Pipes are system-local resources, so sleeping on them * is considered a noninteractive wait: */ - prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); pipe_unlock(pipe); schedule(); - finish_wait(&pipe->wait, &wait); + finish_wait(&pipe->rd_wait, &rdwait); + finish_wait(&pipe->wr_wait, &wrwait); pipe_lock(pipe); } @@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; - bool was_full; + bool was_full, wake_next_reader = false; ssize_t ret; /* Null read succeeds. */ @@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (!buf->len) { pipe_buf_release(pipe, buf); - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); tail++; pipe->tail = tail; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); } total_len -= chars; if (!total_len) @@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * no data. */ if (unlikely(was_full)) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * since we've done any required wakeups and there's no need * to mark anything accessed. And we've dropped the lock. */ - if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0) + if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; __pipe_lock(pipe); was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); + wake_next_reader = true; } + if (pipe_empty(pipe->head, pipe->tail)) + wake_next_reader = false; __pipe_unlock(pipe); if (was_full) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + if (wake_next_reader) + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); if (ret > 0) file_accessed(filp); return ret; @@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) size_t total_len = iov_iter_count(from); ssize_t chars; bool was_empty = false; + bool wake_next_writer = false; /* Null write succeeds. */ if (unlikely(total_len == 0)) @@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * it, either the reader will consume it or it'll still * be there for the next write. */ - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; if (pipe_full(head, pipe->tail, pipe->max_usage)) { - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); continue; } pipe->head = head + 1; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; @@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) */ __pipe_unlock(pipe); if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - wait_event_interruptible(pipe->wait, pipe_writable(pipe)); + wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); + wake_next_writer = true; } out: + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + wake_next_writer = false; __pipe_unlock(pipe); /* @@ -596,9 +608,11 @@ out: * wake up pending jobs */ if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } + if (wake_next_writer) + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { int err = file_update_time(filp); if (err) @@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait) unsigned int head, tail; /* - * Reading only -- no need for acquiring the semaphore. + * Reading pipe state only -- no need for acquiring the semaphore. * * But because this is racy, the code has to add the * entry to the poll table _first_ .. */ - poll_wait(filp, &pipe->wait, wait); + if (filp->f_mode & FMODE_READ) + poll_wait(filp, &pipe->rd_wait, wait); + if (filp->f_mode & FMODE_WRITE) + poll_wait(filp, &pipe->wr_wait, wait); /* * .. and only then can you do the racy tests. That way, @@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file) pipe->writers--; if (pipe->readers || pipe->writers) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void) GFP_KERNEL_ACCOUNT); if (pipe->bufs) { - init_waitqueue_head(&pipe->wait); + init_waitqueue_head(&pipe->rd_wait); + init_waitqueue_head(&pipe->wr_wait); pipe->r_counter = pipe->w_counter = 1; pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; @@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static void wake_up_partner(struct pipe_inode_info *pipe) { - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); + wake_up_interruptible(&pipe->wr_wait); } static int fifo_open(struct inode *inode, struct file *filp) @@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp) err_rd: if (!--pipe->readers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->wr_wait); ret = -ERESTARTSYS; goto err; err_wr: if (!--pipe->writers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); ret = -ERESTARTSYS; goto err; @@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; - wake_up_interruptible_all(&pipe->wait); + wake_up_interruptible_all(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->wr_wait); return pipe->max_usage * PAGE_SIZE; out_revert_acct: diff --git a/fs/proc/root.c b/fs/proc/root.c index 72c07a34cff0..608233dfd29c 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -41,24 +41,19 @@ enum proc_param { Opt_hidepid, }; -static const struct fs_parameter_spec proc_param_specs[] = { +static const struct fs_parameter_spec proc_fs_parameters[] = { fsparam_u32("gid", Opt_gid), fsparam_u32("hidepid", Opt_hidepid), {} }; -static const struct fs_parameter_description proc_fs_parameters = { - .name = "proc", - .specs = proc_param_specs, -}; - static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; - opt = fs_parse(fc, &proc_fs_parameters, param, &result); + opt = fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) return opt; @@ -71,7 +66,7 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->hidepid = result.uint_32; if (ctx->hidepid < HIDEPID_OFF || ctx->hidepid > HIDEPID_INVISIBLE) - return invalf(fc, "proc: hidepid value must be between 0 and 2.\n"); + return invalfc(fc, "hidepid value must be between 0 and 2.\n"); break; default: @@ -207,7 +202,7 @@ static void proc_kill_sb(struct super_block *sb) static struct file_system_type proc_fs_type = { .name = "proc", .init_fs_context = proc_init_fs_context, - .parameters = &proc_fs_parameters, + .parameters = proc_fs_parameters, .kill_sb = proc_kill_sb, .fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM, }; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index d82636e8eb65..ee179a81b3da 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -181,23 +181,18 @@ enum ramfs_param { Opt_mode, }; -static const struct fs_parameter_spec ramfs_param_specs[] = { +const struct fs_parameter_spec ramfs_fs_parameters[] = { fsparam_u32oct("mode", Opt_mode), {} }; -const struct fs_parameter_description ramfs_fs_parameters = { - .name = "ramfs", - .specs = ramfs_param_specs, -}; - static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct ramfs_fs_info *fsi = fc->s_fs_info; int opt; - opt = fs_parse(fc, &ramfs_fs_parameters, param, &result); + opt = fs_parse(fc, ramfs_fs_parameters, param, &result); if (opt < 0) { /* * We might like to report bad mount options here; @@ -278,7 +273,7 @@ static void ramfs_kill_sb(struct super_block *sb) static struct file_system_type ramfs_fs_type = { .name = "ramfs", .init_fs_context = ramfs_init_fs_context, - .parameters = &ramfs_fs_parameters, + .parameters = ramfs_fs_parameters, .kill_sb = ramfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; diff --git a/fs/splice.c b/fs/splice.c index 3009652a41c8..d671936d0aad 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->rd_wait)) + wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->wr_wait)) + wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile index d46e9baee285..b88aecc86550 100644 --- a/fs/unicode/Makefile +++ b/fs/unicode/Makefile @@ -35,4 +35,4 @@ $(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE endif targets += utf8data.h -hostprogs-y += mkutf8data +hostprogs += mkutf8data diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig new file mode 100644 index 000000000000..b84586ae08b3 --- /dev/null +++ b/fs/vboxsf/Kconfig @@ -0,0 +1,10 @@ +config VBOXSF_FS + tristate "VirtualBox guest shared folder (vboxsf) support" + depends on X86 && VBOXGUEST + select NLS + help + VirtualBox hosts can share folders with guests, this driver + implements the Linux-guest side of this allowing folders exported + by the host to be mounted under Linux. + + If you want to use shared folders in VirtualBox guests, answer Y or M. diff --git a/fs/vboxsf/Makefile b/fs/vboxsf/Makefile new file mode 100644 index 000000000000..9e4328e79623 --- /dev/null +++ b/fs/vboxsf/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MIT + +obj-$(CONFIG_VBOXSF_FS) += vboxsf.o + +vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c new file mode 100644 index 000000000000..dd147b490982 --- /dev/null +++ b/fs/vboxsf/dir.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Directory inode and file operations + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +static int vboxsf_dir_open(struct inode *inode, struct file *file) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_createparms params = {}; + struct vboxsf_dir_info *sf_d; + int err; + + sf_d = vboxsf_dir_info_alloc(); + if (!sf_d) + return -ENOMEM; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err) + goto err_free_dir_info; + + if (params.result != SHFL_FILE_EXISTS) { + err = -ENOENT; + goto err_close; + } + + err = vboxsf_dir_read_all(sbi, sf_d, params.handle); + if (err) + goto err_close; + + vboxsf_close(sbi->root, params.handle); + file->private_data = sf_d; + return 0; + +err_close: + vboxsf_close(sbi->root, params.handle); +err_free_dir_info: + vboxsf_dir_info_free(sf_d); + return err; +} + +static int vboxsf_dir_release(struct inode *inode, struct file *file) +{ + if (file->private_data) + vboxsf_dir_info_free(file->private_data); + + return 0; +} + +static unsigned int vboxsf_get_d_type(u32 mode) +{ + unsigned int d_type; + + switch (mode & SHFL_TYPE_MASK) { + case SHFL_TYPE_FIFO: + d_type = DT_FIFO; + break; + case SHFL_TYPE_DEV_CHAR: + d_type = DT_CHR; + break; + case SHFL_TYPE_DIRECTORY: + d_type = DT_DIR; + break; + case SHFL_TYPE_DEV_BLOCK: + d_type = DT_BLK; + break; + case SHFL_TYPE_FILE: + d_type = DT_REG; + break; + case SHFL_TYPE_SYMLINK: + d_type = DT_LNK; + break; + case SHFL_TYPE_SOCKET: + d_type = DT_SOCK; + break; + case SHFL_TYPE_WHITEOUT: + d_type = DT_WHT; + break; + default: + d_type = DT_UNKNOWN; + break; + } + return d_type; +} + +static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb); + struct vboxsf_dir_info *sf_d = dir->private_data; + struct shfl_dirinfo *info; + struct vboxsf_dir_buf *b; + unsigned int d_type; + loff_t i, cur = 0; + ino_t fake_ino; + void *end; + int err; + + list_for_each_entry(b, &sf_d->info_list, head) { +try_next_entry: + if (ctx->pos >= cur + b->entries) { + cur += b->entries; + continue; + } + + /* + * Note the vboxsf_dir_info objects we are iterating over here + * are variable sized, so the info pointer may end up being + * unaligned. This is how we get the data from the host. + * Since vboxsf is only supported on x86 machines this is not + * a problem. + */ + for (i = 0, info = b->buf; i < ctx->pos - cur; i++) { + end = &info->name.string.utf8[info->name.size]; + /* Only happens if the host gives us corrupt data */ + if (WARN_ON(end > (b->buf + b->used))) + return false; + info = end; + } + + end = &info->name.string.utf8[info->name.size]; + if (WARN_ON(end > (b->buf + b->used))) + return false; + + /* Info now points to the right entry, emit it. */ + d_type = vboxsf_get_d_type(info->info.attr.mode); + + /* + * On 32 bit systems pos is 64 signed, while ino is 32 bit + * unsigned so fake_ino may overflow, check for this. + */ + if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) { + vbg_err("vboxsf: fake ino overflow, truncating dir\n"); + return false; + } + fake_ino = ctx->pos + 1; + + if (sbi->nls) { + char d_name[NAME_MAX]; + + err = vboxsf_nlscpy(sbi, d_name, NAME_MAX, + info->name.string.utf8, + info->name.length); + if (err) { + /* skip erroneous entry and proceed */ + ctx->pos += 1; + goto try_next_entry; + } + + return dir_emit(ctx, d_name, strlen(d_name), + fake_ino, d_type); + } + + return dir_emit(ctx, info->name.string.utf8, info->name.length, + fake_ino, d_type); + } + + return false; +} + +static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) +{ + bool emitted; + + do { + emitted = vboxsf_dir_emit(dir, ctx); + if (emitted) + ctx->pos += 1; + } while (emitted); + + return 0; +} + +const struct file_operations vboxsf_dir_fops = { + .open = vboxsf_dir_open, + .iterate = vboxsf_dir_iterate, + .release = vboxsf_dir_release, + .read = generic_read_dir, + .llseek = generic_file_llseek, +}; + +/* + * This is called during name resolution/lookup to check if the @dentry in + * the cache is still valid. the job is handled by vboxsf_inode_revalidate. + */ +static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (d_really_is_positive(dentry)) + return vboxsf_inode_revalidate(dentry) == 0; + else + return vboxsf_stat_dentry(dentry, NULL) == -ENOENT; +} + +const struct dentry_operations vboxsf_dentry_ops = { + .d_revalidate = vboxsf_dentry_revalidate +}; + +/* iops */ + +static struct dentry *vboxsf_dir_lookup(struct inode *parent, + struct dentry *dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_fsobjinfo fsinfo; + struct inode *inode; + int err; + + dentry->d_time = jiffies; + + err = vboxsf_stat_dentry(dentry, &fsinfo); + if (err) { + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); + } else { + inode = vboxsf_new_inode(parent->i_sb); + if (!IS_ERR(inode)) + vboxsf_init_inode(sbi, inode, &fsinfo); + } + + return d_splice_alias(inode, dentry); +} + +static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, + struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_i; + struct inode *inode; + + inode = vboxsf_new_inode(parent->i_sb); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + sf_i = VBOXSF_I(inode); + /* The host may have given us different attr then requested */ + sf_i->force_restat = 1; + vboxsf_init_inode(sbi, inode, info); + + d_instantiate(dentry, inode); + + return 0; +} + +static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, + umode_t mode, int is_dir) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | + SHFL_CF_ACT_FAIL_IF_EXISTS | + SHFL_CF_ACCESS_READWRITE | + (is_dir ? SHFL_CF_DIRECTORY : 0); + params.info.attr.mode = (mode & 0777) | + (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE); + params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_CREATED) + return -EPERM; + + vboxsf_close(sbi->root, params.handle); + + err = vboxsf_dir_instantiate(parent, dentry, ¶ms.info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry, + umode_t mode, bool excl) +{ + return vboxsf_dir_create(parent, dentry, mode, 0); +} + +static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry, + umode_t mode) +{ + return vboxsf_dir_create(parent, dentry, mode, 1); +} + +static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct inode *inode = d_inode(dentry); + struct shfl_string *path; + u32 flags; + int err; + + if (S_ISDIR(inode->i_mode)) + flags = SHFL_REMOVE_DIR; + else + flags = SHFL_REMOVE_FILE; + + if (S_ISLNK(inode->i_mode)) + flags |= SHFL_REMOVE_SYMLINK; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_remove(sbi->root, path, flags); + __putname(path); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_rename(struct inode *old_parent, + struct dentry *old_dentry, + struct inode *new_parent, + struct dentry *new_dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb); + struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent); + struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent); + u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS; + struct shfl_string *old_path, *new_path; + int err; + + if (flags) + return -EINVAL; + + old_path = vboxsf_path_from_dentry(sbi, old_dentry); + if (IS_ERR(old_path)) + return PTR_ERR(old_path); + + new_path = vboxsf_path_from_dentry(sbi, new_dentry); + if (IS_ERR(new_path)) { + err = PTR_ERR(new_path); + goto err_put_old_path; + } + + if (d_inode(old_dentry)->i_mode & S_IFDIR) + shfl_flags = 0; + + err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags); + if (err == 0) { + /* parent directories access/change time changed */ + sf_new_parent_i->force_restat = 1; + sf_old_parent_i->force_restat = 1; + } + + __putname(new_path); +err_put_old_path: + __putname(old_path); + return err; +} + +static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry, + const char *symname) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + int symname_size = strlen(symname) + 1; + struct shfl_string *path, *ssymname; + struct shfl_fsobjinfo info; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL); + if (!ssymname) { + __putname(path); + return -ENOMEM; + } + ssymname->length = symname_size - 1; + ssymname->size = symname_size; + memcpy(ssymname->string.utf8, symname, symname_size); + + err = vboxsf_symlink(sbi->root, path, ssymname, &info); + kfree(ssymname); + __putname(path); + if (err) { + /* -EROFS means symlinks are note support -> -EPERM */ + return (err == -EROFS) ? -EPERM : err; + } + + err = vboxsf_dir_instantiate(parent, dentry, &info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + return 0; +} + +const struct inode_operations vboxsf_dir_iops = { + .lookup = vboxsf_dir_lookup, + .create = vboxsf_dir_mkfile, + .mkdir = vboxsf_dir_mkdir, + .rmdir = vboxsf_dir_unlink, + .unlink = vboxsf_dir_unlink, + .rename = vboxsf_dir_rename, + .symlink = vboxsf_dir_symlink, + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr, +}; diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c new file mode 100644 index 000000000000..c4ab5996d97a --- /dev/null +++ b/fs/vboxsf/file.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Regular file inode and file ops. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/sizes.h> +#include "vfsmod.h" + +struct vboxsf_handle { + u64 handle; + u32 root; + u32 access_flags; + struct kref refcount; + struct list_head head; +}; + +static int vboxsf_file_open(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct shfl_createparms params = {}; + struct vboxsf_handle *sf_handle; + u32 access_flags = 0; + int err; + + sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL); + if (!sf_handle) + return -ENOMEM; + + /* + * We check the value of params.handle afterwards to find out if + * the call succeeded or failed, as the API does not seem to cleanly + * distinguish error and informational messages. + * + * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to + * make the shared folders host service use our mode parameter. + */ + params.handle = SHFL_HANDLE_NIL; + if (file->f_flags & O_CREAT) { + params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW; + /* + * We ignore O_EXCL, as the Linux kernel seems to call create + * beforehand itself, so O_EXCL should always fail. + */ + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + else + params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS; + } else { + params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW; + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } + + switch (file->f_flags & O_ACCMODE) { + case O_RDONLY: + access_flags |= SHFL_CF_ACCESS_READ; + break; + + case O_WRONLY: + access_flags |= SHFL_CF_ACCESS_WRITE; + break; + + case O_RDWR: + access_flags |= SHFL_CF_ACCESS_READWRITE; + break; + + default: + WARN_ON(1); + } + + if (file->f_flags & O_APPEND) + access_flags |= SHFL_CF_ACCESS_APPEND; + + params.create_flags |= access_flags; + params.info.attr.mode = inode->i_mode; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err == 0 && params.handle == SHFL_HANDLE_NIL) + err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT; + if (err) { + kfree(sf_handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + + /* init our handle struct and add it to the inode's handles list */ + sf_handle->handle = params.handle; + sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; + sf_handle->access_flags = access_flags; + kref_init(&sf_handle->refcount); + + mutex_lock(&sf_i->handle_list_mutex); + list_add(&sf_handle->head, &sf_i->handle_list); + mutex_unlock(&sf_i->handle_list_mutex); + + file->private_data = sf_handle; + return 0; +} + +static void vboxsf_handle_release(struct kref *refcount) +{ + struct vboxsf_handle *sf_handle = + container_of(refcount, struct vboxsf_handle, refcount); + + vboxsf_close(sf_handle->root, sf_handle->handle); + kfree(sf_handle); +} + +static int vboxsf_file_release(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle = file->private_data; + + /* + * When a file is closed on our (the guest) side, we want any subsequent + * accesses done on the host side to see all changes done from our side. + */ + filemap_write_and_wait(inode->i_mapping); + + mutex_lock(&sf_i->handle_list_mutex); + list_del(&sf_handle->head); + mutex_unlock(&sf_i->handle_list_mutex); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + return 0; +} + +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void vboxsf_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +static const struct vm_operations_struct vboxsf_file_vm_ops = { + .close = vboxsf_vma_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, +}; + +static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + err = generic_file_mmap(file, vma); + if (!err) + vma->vm_ops = &vboxsf_file_vm_ops; + + return err; +} + +/* + * Note that since we are accessing files on the host's filesystem, files + * may always be changed underneath us by the host! + * + * The vboxsf API between the guest and the host does not offer any functions + * to deal with this. There is no inode-generation to check for changes, no + * events / callback on changes and no way to lock files. + * + * To avoid returning stale data when a file gets *opened* on our (the guest) + * side, we do a "stat" on the host side, then compare the mtime with the + * last known mtime and invalidate the page-cache if they differ. + * This is done from vboxsf_inode_revalidate(). + * + * When reads are done through the read_iter fop, it is possible to do + * further cache revalidation then, there are 3 options to deal with this: + * + * 1) Rely solely on the revalidation done at open time + * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf + * host API does not allow stat on handles, so we would need to use + * file->f_path.dentry and the stat will then fail if the file was unlinked + * or renamed (and there is no thing like NFS' silly-rename). So we get: + * 2a) "stat" and compare mtime, on stat failure invalidate the cache + * 2b) "stat" and compare mtime, on stat failure do nothing + * 3) Simply always call invalidate_inode_pages2_range on the range of the read + * + * Currently we are keeping things KISS and using option 1. this allows + * directly using generic_file_read_iter without wrapping it. + * + * This means that only data written on the host side before open() on + * the guest side is guaranteed to be seen by the guest. If necessary + * we may provide other read-cache strategies in the future and make this + * configurable through a mount option. + */ +const struct file_operations vboxsf_reg_fops = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = vboxsf_file_mmap, + .open = vboxsf_file_open, + .release = vboxsf_file_release, + .fsync = noop_fsync, + .splice_read = generic_file_splice_read, +}; + +const struct inode_operations vboxsf_reg_iops = { + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr +}; + +static int vboxsf_readpage(struct file *file, struct page *page) +{ + struct vboxsf_handle *sf_handle = file->private_data; + loff_t off = page_offset(page); + u32 nread = PAGE_SIZE; + u8 *buf; + int err; + + buf = kmap(page); + + err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf); + if (err == 0) { + memset(&buf[nread], 0, PAGE_SIZE - nread); + flush_dcache_page(page); + SetPageUptodate(page); + } else { + SetPageError(page); + } + + kunmap(page); + unlock_page(page); + return err; +} + +static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i) +{ + struct vboxsf_handle *h, *sf_handle = NULL; + + mutex_lock(&sf_i->handle_list_mutex); + list_for_each_entry(h, &sf_i->handle_list, head) { + if (h->access_flags == SHFL_CF_ACCESS_WRITE || + h->access_flags == SHFL_CF_ACCESS_READWRITE) { + kref_get(&h->refcount); + sf_handle = h; + break; + } + } + mutex_unlock(&sf_i->handle_list_mutex); + + return sf_handle; +} + +static int vboxsf_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle; + loff_t off = page_offset(page); + loff_t size = i_size_read(inode); + u32 nwrite = PAGE_SIZE; + u8 *buf; + int err; + + if (off + PAGE_SIZE > size) + nwrite = size & ~PAGE_MASK; + + sf_handle = vboxsf_get_write_handle(sf_i); + if (!sf_handle) + return -EBADF; + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + off, &nwrite, buf); + kunmap(page); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + + if (err == 0) { + ClearPageError(page); + /* mtime changed */ + sf_i->force_restat = 1; + } else { + ClearPageUptodate(page); + } + + unlock_page(page); + return err; +} + +static int vboxsf_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct vboxsf_handle *sf_handle = file->private_data; + unsigned int from = pos & ~PAGE_MASK; + u32 nwritten = len; + u8 *buf; + int err; + + /* zero the stale part of the page if we did a short copy */ + if (!PageUptodate(page) && copied < len) + zero_user(page, from + copied, len - copied); + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + pos, &nwritten, buf + from); + kunmap(page); + + if (err) { + nwritten = 0; + goto out; + } + + /* mtime changed */ + VBOXSF_I(inode)->force_restat = 1; + + if (!PageUptodate(page) && nwritten == PAGE_SIZE) + SetPageUptodate(page); + + pos += nwritten; + if (pos > inode->i_size) + i_size_write(inode, pos); + +out: + unlock_page(page); + put_page(page); + + return nwritten; +} + +/* + * Note simple_write_begin does not read the page from disk on partial writes + * this is ok since vboxsf_write_end only writes the written parts of the + * page and it does not call SetPageUptodate for partial writes. + */ +const struct address_space_operations vboxsf_reg_aops = { + .readpage = vboxsf_readpage, + .writepage = vboxsf_writepage, + .set_page_dirty = __set_page_dirty_nobuffers, + .write_begin = simple_write_begin, + .write_end = vboxsf_write_end, +}; + +static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_string *path; + char *link; + int err; + + if (!dentry) + return ERR_PTR(-ECHILD); + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return ERR_CAST(path); + + link = kzalloc(PATH_MAX, GFP_KERNEL); + if (!link) { + __putname(path); + return ERR_PTR(-ENOMEM); + } + + err = vboxsf_readlink(sbi->root, path, PATH_MAX, link); + __putname(path); + if (err) { + kfree(link); + return ERR_PTR(err); + } + + set_delayed_call(done, kfree_link, link); + return link; +} + +const struct inode_operations vboxsf_lnk_iops = { + .get_link = vboxsf_get_link +}; diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h new file mode 100644 index 000000000000..aca829062c12 --- /dev/null +++ b/fs/vboxsf/shfl_hostintf.h @@ -0,0 +1,901 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Shared Folders: host interface definition. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef SHFL_HOSTINTF_H +#define SHFL_HOSTINTF_H + +#include <linux/vbox_vmmdev_types.h> + +/* The max in/out buffer size for a FN_READ or FN_WRITE call */ +#define SHFL_MAX_RW_COUNT (16 * SZ_1M) + +/* + * Structures shared between guest and the service + * can be relocated and use offsets to point to variable + * length parts. + * + * Shared folders protocol works with handles. + * Before doing any action on a file system object, + * one have to obtain the object handle via a SHFL_FN_CREATE + * request. A handle must be closed with SHFL_FN_CLOSE. + */ + +enum { + SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */ + SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */ + SHFL_FN_CREATE = 3, /* Open/create object. */ + SHFL_FN_CLOSE = 4, /* Close object handle. */ + SHFL_FN_READ = 5, /* Read object content. */ + SHFL_FN_WRITE = 6, /* Write new object content. */ + SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */ + SHFL_FN_LIST = 8, /* List object content. */ + SHFL_FN_INFORMATION = 9, /* Query/set object information. */ + /* Note function number 10 is not used! */ + SHFL_FN_REMOVE = 11, /* Remove object */ + SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */ + SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */ + SHFL_FN_RENAME = 14, /* Rename object */ + SHFL_FN_FLUSH = 15, /* Flush file */ + SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */ + SHFL_FN_MAP_FOLDER = 17, /* Map folder */ + SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */ + SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */ + SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */ +}; + +/* Root handles for a mapping are of type u32, Root handles are unique. */ +#define SHFL_ROOT_NIL UINT_MAX + +/* Shared folders handle for an opened object are of type u64. */ +#define SHFL_HANDLE_NIL ULLONG_MAX + +/* Hardcoded maximum length (in chars) of a shared folder name. */ +#define SHFL_MAX_LEN (256) +/* Hardcoded maximum number of shared folder mapping available to the guest. */ +#define SHFL_MAX_MAPPINGS (64) + +/** Shared folder string buffer structure. */ +struct shfl_string { + /** Allocated size of the string member in bytes. */ + u16 size; + + /** Length of string without trailing nul in bytes. */ + u16 length; + + /** UTF-8 or UTF-16 string. Nul terminated. */ + union { + u8 utf8[2]; + u16 utf16[1]; + u16 ucs2[1]; /* misnomer, use utf16. */ + } string; +}; +VMMDEV_ASSERT_SIZE(shfl_string, 6); + +/* The size of shfl_string w/o the string part. */ +#define SHFLSTRING_HEADER_SIZE 4 + +/* Calculate size of the string. */ +static inline u32 shfl_string_buf_size(const struct shfl_string *string) +{ + return string ? SHFLSTRING_HEADER_SIZE + string->size : 0; +} + +/* Set user id on execution (S_ISUID). */ +#define SHFL_UNIX_ISUID 0004000U +/* Set group id on execution (S_ISGID). */ +#define SHFL_UNIX_ISGID 0002000U +/* Sticky bit (S_ISVTX / S_ISTXT). */ +#define SHFL_UNIX_ISTXT 0001000U + +/* Owner readable (S_IRUSR). */ +#define SHFL_UNIX_IRUSR 0000400U +/* Owner writable (S_IWUSR). */ +#define SHFL_UNIX_IWUSR 0000200U +/* Owner executable (S_IXUSR). */ +#define SHFL_UNIX_IXUSR 0000100U + +/* Group readable (S_IRGRP). */ +#define SHFL_UNIX_IRGRP 0000040U +/* Group writable (S_IWGRP). */ +#define SHFL_UNIX_IWGRP 0000020U +/* Group executable (S_IXGRP). */ +#define SHFL_UNIX_IXGRP 0000010U + +/* Other readable (S_IROTH). */ +#define SHFL_UNIX_IROTH 0000004U +/* Other writable (S_IWOTH). */ +#define SHFL_UNIX_IWOTH 0000002U +/* Other executable (S_IXOTH). */ +#define SHFL_UNIX_IXOTH 0000001U + +/* Named pipe (fifo) (S_IFIFO). */ +#define SHFL_TYPE_FIFO 0010000U +/* Character device (S_IFCHR). */ +#define SHFL_TYPE_DEV_CHAR 0020000U +/* Directory (S_IFDIR). */ +#define SHFL_TYPE_DIRECTORY 0040000U +/* Block device (S_IFBLK). */ +#define SHFL_TYPE_DEV_BLOCK 0060000U +/* Regular file (S_IFREG). */ +#define SHFL_TYPE_FILE 0100000U +/* Symbolic link (S_IFLNK). */ +#define SHFL_TYPE_SYMLINK 0120000U +/* Socket (S_IFSOCK). */ +#define SHFL_TYPE_SOCKET 0140000U +/* Whiteout (S_IFWHT). */ +#define SHFL_TYPE_WHITEOUT 0160000U +/* Type mask (S_IFMT). */ +#define SHFL_TYPE_MASK 0170000U + +/* Checks the mode flags indicate a directory (S_ISDIR). */ +#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY) +/* Checks the mode flags indicate a symbolic link (S_ISLNK). */ +#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK) + +/** The available additional information in a shfl_fsobjattr object. */ +enum shfl_fsobjattr_add { + /** No additional information is available / requested. */ + SHFLFSOBJATTRADD_NOTHING = 1, + /** + * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are + * available / requested. + */ + SHFLFSOBJATTRADD_UNIX, + /** + * The additional extended attribute size (shfl_fsobjattr::u::size) is + * available / requested. + */ + SHFLFSOBJATTRADD_EASIZE, + /** + * The last valid item (inclusive). + * The valid range is SHFLFSOBJATTRADD_NOTHING thru + * SHFLFSOBJATTRADD_LAST. + */ + SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE, + + /** The usual 32-bit hack. */ + SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff +}; + +/** + * Additional unix Attributes, these are available when + * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX. + */ +struct shfl_fsobjattr_unix { + /** + * The user owning the filesystem object (st_uid). + * This field is ~0U if not supported. + */ + u32 uid; + + /** + * The group the filesystem object is assigned (st_gid). + * This field is ~0U if not supported. + */ + u32 gid; + + /** + * Number of hard links to this filesystem object (st_nlink). + * This field is 1 if the filesystem doesn't support hardlinking or + * the information isn't available. + */ + u32 hardlinks; + + /** + * The device number of the device which this filesystem object resides + * on (st_dev). This field is 0 if this information is not available. + */ + u32 inode_id_device; + + /** + * The unique identifier (within the filesystem) of this filesystem + * object (st_ino). Together with inode_id_device, this field can be + * used as a OS wide unique id, when both their values are not 0. + * This field is 0 if the information is not available. + */ + u64 inode_id; + + /** + * User flags (st_flags). + * This field is 0 if this information is not available. + */ + u32 flags; + + /** + * The current generation number (st_gen). + * This field is 0 if this information is not available. + */ + u32 generation_id; + + /** + * The device number of a char. or block device type object (st_rdev). + * This field is 0 if the file isn't a char. or block device or when + * the OS doesn't use the major+minor device idenfication scheme. + */ + u32 device; +} __packed; + +/** Extended attribute size. */ +struct shfl_fsobjattr_easize { + /** Size of EAs. */ + s64 cb; +} __packed; + +/** Shared folder filesystem object attributes. */ +struct shfl_fsobjattr { + /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */ + u32 mode; + + /** The additional attributes available. */ + enum shfl_fsobjattr_add additional; + + /** + * Additional attributes. + * + * Unless explicitly specified to an API, the API can provide additional + * data as it is provided by the underlying OS. + */ + union { + struct shfl_fsobjattr_unix unix_attr; + struct shfl_fsobjattr_easize size; + } __packed u; +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44); + +struct shfl_timespec { + s64 ns_relative_to_unix_epoch; +}; + +/** Filesystem object information structure. */ +struct shfl_fsobjinfo { + /** + * Logical size (st_size). + * For normal files this is the size of the file. + * For symbolic links, this is the length of the path name contained + * in the symbolic link. + * For other objects this fields needs to be specified. + */ + s64 size; + + /** Disk allocation size (st_blocks * DEV_BSIZE). */ + s64 allocated; + + /** Time of last access (st_atime). */ + struct shfl_timespec access_time; + + /** Time of last data modification (st_mtime). */ + struct shfl_timespec modification_time; + + /** + * Time of last status change (st_ctime). + * If not available this is set to modification_time. + */ + struct shfl_timespec change_time; + + /** + * Time of file birth (st_birthtime). + * If not available this is set to change_time. + */ + struct shfl_timespec birth_time; + + /** Attributes. */ + struct shfl_fsobjattr attr; + +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92); + +/** + * result of an open/create request. + * Along with handle value the result code + * identifies what has happened while + * trying to open the object. + */ +enum shfl_create_result { + SHFL_NO_RESULT, + /** Specified path does not exist. */ + SHFL_PATH_NOT_FOUND, + /** Path to file exists, but the last component does not. */ + SHFL_FILE_NOT_FOUND, + /** File already exists and either has been opened or not. */ + SHFL_FILE_EXISTS, + /** New file was created. */ + SHFL_FILE_CREATED, + /** Existing file was replaced or overwritten. */ + SHFL_FILE_REPLACED +}; + +/* No flags. Initialization value. */ +#define SHFL_CF_NONE (0x00000000) + +/* + * Only lookup the object, do not return a handle. When this is set all other + * flags are ignored. + */ +#define SHFL_CF_LOOKUP (0x00000001) + +/* + * Open parent directory of specified object. + * Useful for the corresponding Windows FSD flag + * and for opening paths like \\dir\\*.* to search the 'dir'. + */ +#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002) + +/* Create/open a directory. */ +#define SHFL_CF_DIRECTORY (0x00000004) + +/* + * Open/create action to do if object exists + * and if the object does not exists. + * REPLACE file means atomically DELETE and CREATE. + * OVERWRITE file means truncating the file to 0 and + * setting new size. + * When opening an existing directory REPLACE and OVERWRITE + * actions are considered invalid, and cause returning + * FILE_EXISTS with NIL handle. + */ +#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0) +#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00) + +/* What to do if object exists. */ +#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010) +#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020) +#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030) + +/* What to do if object does not exist. */ +#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100) + +/* Read/write requested access for the object. */ +#define SHFL_CF_ACCESS_MASK_RW (0x00003000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_READ (0x00001000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_WRITE (0x00002000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_READWRITE (0x00003000) + +/* Requested share access for the object. */ +#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000) + +/* Allow any access. */ +#define SHFL_CF_ACCESS_DENYNONE (0x00000000) +/* Do not allow read. */ +#define SHFL_CF_ACCESS_DENYREAD (0x00004000) +/* Do not allow write. */ +#define SHFL_CF_ACCESS_DENYWRITE (0x00008000) +/* Do not allow access. */ +#define SHFL_CF_ACCESS_DENYALL (0x0000c000) + +/* Requested access to attributes of the object. */ +#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_ATTR_READ (0x00010000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000) + +/* + * The file is opened in append mode. + * Ignored if SHFL_CF_ACCESS_WRITE is not set. + */ +#define SHFL_CF_ACCESS_APPEND (0x00040000) + +/** Create parameters buffer struct for SHFL_FN_CREATE call */ +struct shfl_createparms { + /** Returned handle of opened object. */ + u64 handle; + + /** Returned result of the operation */ + enum shfl_create_result result; + + /** SHFL_CF_* */ + u32 create_flags; + + /** + * Attributes of object to create and + * returned actual attributes of opened/created object. + */ + struct shfl_fsobjinfo info; +} __packed; + +/** Shared Folder directory information */ +struct shfl_dirinfo { + /** Full information about the object. */ + struct shfl_fsobjinfo info; + /** + * The length of the short field (number of UTF16 chars). + * It is 16-bit for reasons of alignment. + */ + u16 short_name_len; + /** + * The short name for 8.3 compatibility. + * Empty string if not available. + */ + u16 short_name[14]; + struct shfl_string name; +}; + +/** Shared folder filesystem properties. */ +struct shfl_fsproperties { + /** + * The maximum size of a filesystem object name. + * This does not include the '\\0'. + */ + u32 max_component_len; + + /** + * True if the filesystem is remote. + * False if the filesystem is local. + */ + bool remote; + + /** + * True if the filesystem is case sensitive. + * False if the filesystem is case insensitive. + */ + bool case_sensitive; + + /** + * True if the filesystem is mounted read only. + * False if the filesystem is mounted read write. + */ + bool read_only; + + /** + * True if the filesystem can encode unicode object names. + * False if it can't. + */ + bool supports_unicode; + + /** + * True if the filesystem is compresses. + * False if it isn't or we don't know. + */ + bool compressed; + + /** + * True if the filesystem compresses of individual files. + * False if it doesn't or we don't know. + */ + bool file_compression; +}; +VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12); + +struct shfl_volinfo { + s64 total_allocation_bytes; + s64 available_allocation_bytes; + u32 bytes_per_allocation_unit; + u32 bytes_per_sector; + u32 serial; + struct shfl_fsproperties properties; +}; + + +/** SHFL_FN_MAP_FOLDER Parameters structure. */ +struct shfl_map_folder { + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: UTF16 + * Path delimiter + */ + struct vmmdev_hgcm_function_parameter delimiter; + + /** + * pointer, in: SHFLROOT (u32) + * Case senstive flag + */ + struct vmmdev_hgcm_function_parameter case_sensitive; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_MAP_FOLDER (4) + + +/** SHFL_FN_UNMAP_FOLDER Parameters structure. */ +struct shfl_unmap_folder { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_UNMAP_FOLDER (1) + + +/** SHFL_FN_CREATE Parameters structure. */ +struct shfl_create { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, in/out: + * Points to struct shfl_createparms buffer. + */ + struct vmmdev_hgcm_function_parameter parms; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CREATE (3) + + +/** SHFL_FN_CLOSE Parameters structure. */ +struct shfl_close { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to close. + */ + struct vmmdev_hgcm_function_parameter handle; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CLOSE (2) + + +/** SHFL_FN_READ Parameters structure. */ +struct shfl_read { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to read from. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to read from. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to read/How many were read. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READ (5) + + +/** SHFL_FN_WRITE Parameters structure. */ +struct shfl_write { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to write to. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to write to. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to write/How many were written. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in: + * Data to write. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_WRITE (5) + + +/* + * SHFL_FN_LIST + * Listing information includes variable length RTDIRENTRY[EX] structures. + */ + +#define SHFL_LIST_NONE 0 +#define SHFL_LIST_RETURN_ONE 1 + +/** SHFL_FN_LIST Parameters structure. */ +struct shfl_list { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * List flags SHFL_LIST_*. + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for listing information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/optional + * Points to struct shfl_string buffer that specifies a search path. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place listing information to. (struct shfl_dirinfo) + */ + struct vmmdev_hgcm_function_parameter buffer; + + /** + * value32, in/out: + * Indicates a key where the listing must be resumed. + * in: 0 means start from begin of object. + * out: 0 means listing completed. + */ + struct vmmdev_hgcm_function_parameter resume_point; + + /** + * pointer, out: + * Number of files returned + */ + struct vmmdev_hgcm_function_parameter file_count; +}; + +/* Number of parameters */ +#define SHFL_CPARMS_LIST (8) + + +/** SHFL_FN_READLINK Parameters structure. */ +struct shfl_readLink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READLINK (3) + + +/* SHFL_FN_INFORMATION */ + +/* Mask of Set/Get bit. */ +#define SHFL_INFO_MODE_MASK (0x1) +/* Get information */ +#define SHFL_INFO_GET (0x0) +/* Set information */ +#define SHFL_INFO_SET (0x1) + +/* Get name of the object. */ +#define SHFL_INFO_NAME (0x2) +/* Set size of object (extend/trucate); only applies to file objects */ +#define SHFL_INFO_SIZE (0x4) +/* Get/Set file object info. */ +#define SHFL_INFO_FILE (0x8) +/* Get volume information. */ +#define SHFL_INFO_VOLUME (0x10) + +/** SHFL_FN_INFORMATION Parameters structure. */ +struct shfl_information { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * SHFL_INFO_* + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/out: + * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not + * forget to set the shfl_fsobjinfo::attr::additional for a get + * operation as well. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_INFORMATION (5) + + +/* SHFL_FN_REMOVE */ + +#define SHFL_REMOVE_FILE (0x1) +#define SHFL_REMOVE_DIR (0x2) +#define SHFL_REMOVE_SYMLINK (0x4) + +/** SHFL_FN_REMOVE Parameters structure. */ +struct shfl_remove { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * value32, in: + * remove flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_REMOVE (3) + + +/* SHFL_FN_RENAME */ + +#define SHFL_RENAME_FILE (0x1) +#define SHFL_RENAME_DIR (0x2) +#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4) + +/** SHFL_FN_RENAME Parameters structure. */ +struct shfl_rename { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string src. + */ + struct vmmdev_hgcm_function_parameter src; + + /** + * pointer, in: + * Points to struct shfl_string dest. + */ + struct vmmdev_hgcm_function_parameter dest; + + /** + * value32, in: + * rename flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_RENAME (4) + + +/** SHFL_FN_SYMLINK Parameters structure. */ +struct shfl_symlink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string of path for the new symlink. + */ + struct vmmdev_hgcm_function_parameter new_path; + + /** + * pointer, in: + * Points to struct shfl_string of destination for symlink. + */ + struct vmmdev_hgcm_function_parameter old_path; + + /** + * pointer, out: + * Information about created symlink. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +#define SHFL_CPARMS_SYMLINK (4) + +#endif diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c new file mode 100644 index 000000000000..675e26989376 --- /dev/null +++ b/fs/vboxsf/super.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Virtual File System. + * + * Module initialization/finalization + * File system registration/deregistration + * Superblock reading + * Few utility functions + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/idr.h> +#include <linux/fs_parser.h> +#include <linux/magic.h> +#include <linux/module.h> +#include <linux/nls.h> +#include <linux/statfs.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ + +#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000') +#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377') +#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376') +#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375') + +static int follow_symlinks; +module_param(follow_symlinks, int, 0444); +MODULE_PARM_DESC(follow_symlinks, + "Let host resolve symlinks rather than showing them"); + +static DEFINE_IDA(vboxsf_bdi_ida); +static DEFINE_MUTEX(vboxsf_setup_mutex); +static bool vboxsf_setup_done; +static struct super_operations vboxsf_super_ops; /* forward declaration */ +static struct kmem_cache *vboxsf_inode_cachep; + +static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT; + +enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode, + opt_dmask, opt_fmask }; + +static const struct fs_parameter_spec vboxsf_fs_parameters[] = { + fsparam_string ("nls", opt_nls), + fsparam_u32 ("uid", opt_uid), + fsparam_u32 ("gid", opt_gid), + fsparam_u32 ("ttl", opt_ttl), + fsparam_u32oct ("dmode", opt_dmode), + fsparam_u32oct ("fmode", opt_fmode), + fsparam_u32oct ("dmask", opt_dmask), + fsparam_u32oct ("fmask", opt_fmask), + {} +}; + +static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + kuid_t uid; + kgid_t gid; + int opt; + + opt = fs_parse(fc, vboxsf_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case opt_nls: + if (ctx->nls_name || fc->purpose != FS_CONTEXT_FOR_MOUNT) { + vbg_err("vboxsf: Cannot reconfigure nls option\n"); + return -EINVAL; + } + ctx->nls_name = param->string; + param->string = NULL; + break; + case opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return -EINVAL; + ctx->o.uid = uid; + break; + case opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return -EINVAL; + ctx->o.gid = gid; + break; + case opt_ttl: + ctx->o.ttl = msecs_to_jiffies(result.uint_32); + break; + case opt_dmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.dmode = result.uint_32; + ctx->o.dmode_set = true; + break; + case opt_fmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.fmode = result.uint_32; + ctx->o.fmode_set = true; + break; + case opt_dmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.dmask = result.uint_32; + break; + case opt_fmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.fmask = result.uint_32; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct shfl_string *folder_name, root_path; + struct vboxsf_sbi *sbi; + struct dentry *droot; + struct inode *iroot; + char *nls_name; + size_t size; + int err; + + if (!fc->source) + return -EINVAL; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sbi->o = ctx->o; + idr_init(&sbi->ino_idr); + spin_lock_init(&sbi->ino_idr_lock); + sbi->next_generation = 1; + sbi->bdi_id = -1; + + /* Load nls if not utf8 */ + nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls; + if (strcmp(nls_name, "utf8") != 0) { + if (nls_name == vboxsf_default_nls) + sbi->nls = load_nls_default(); + else + sbi->nls = load_nls(nls_name); + + if (!sbi->nls) { + vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); + err = -EINVAL; + goto fail_free; + } + } + + sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + if (sbi->bdi_id < 0) { + err = sbi->bdi_id; + goto fail_free; + } + + err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + if (err) + goto fail_free; + + /* Turn source into a shfl_string and map the folder */ + size = strlen(fc->source) + 1; + folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL); + if (!folder_name) { + err = -ENOMEM; + goto fail_free; + } + folder_name->size = size; + folder_name->length = size - 1; + strlcpy(folder_name->string.utf8, fc->source, size); + err = vboxsf_map_folder(folder_name, &sbi->root); + kfree(folder_name); + if (err) { + vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n", + fc->source, err); + goto fail_free; + } + + root_path.length = 1; + root_path.size = 2; + root_path.string.utf8[0] = '/'; + root_path.string.utf8[1] = 0; + err = vboxsf_stat(sbi, &root_path, &sbi->root_info); + if (err) + goto fail_unmap; + + sb->s_magic = VBOXSF_SUPER_MAGIC; + sb->s_blocksize = 1024; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &vboxsf_super_ops; + sb->s_d_op = &vboxsf_dentry_ops; + + iroot = iget_locked(sb, 0); + if (!iroot) { + err = -ENOMEM; + goto fail_unmap; + } + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + unlock_new_inode(iroot); + + droot = d_make_root(iroot); + if (!droot) { + err = -ENOMEM; + goto fail_unmap; + } + + sb->s_root = droot; + sb->s_fs_info = sbi; + return 0; + +fail_unmap: + vboxsf_unmap_folder(sbi->root); +fail_free: + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + idr_destroy(&sbi->ino_idr); + kfree(sbi); + return err; +} + +static void vboxsf_inode_init_once(void *data) +{ + struct vboxsf_inode *sf_i = data; + + mutex_init(&sf_i->handle_list_mutex); + inode_init_once(&sf_i->vfs_inode); +} + +static struct inode *vboxsf_alloc_inode(struct super_block *sb) +{ + struct vboxsf_inode *sf_i; + + sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS); + if (!sf_i) + return NULL; + + sf_i->force_restat = 0; + INIT_LIST_HEAD(&sf_i->handle_list); + + return &sf_i->vfs_inode; +} + +static void vboxsf_free_inode(struct inode *inode) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + unsigned long flags; + + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + idr_remove(&sbi->ino_idr, inode->i_ino); + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode)); +} + +static void vboxsf_put_super(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + + vboxsf_unmap_folder(sbi->root); + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + + /* + * vboxsf_free_inode uses the idr, make sure all delayed rcu free + * inodes are flushed. + */ + rcu_barrier(); + idr_destroy(&sbi->ino_idr); + kfree(sbi); +} + +static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat) +{ + struct super_block *sb = dentry->d_sb; + struct shfl_volinfo shfl_volinfo; + struct vboxsf_sbi *sbi; + u32 buf_len; + int err; + + sbi = VBOXSF_SBI(sb); + buf_len = sizeof(shfl_volinfo); + err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME, + &buf_len, &shfl_volinfo); + if (err) + return err; + + stat->f_type = VBOXSF_SUPER_MAGIC; + stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit; + + do_div(shfl_volinfo.total_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_blocks = shfl_volinfo.total_allocation_bytes; + + do_div(shfl_volinfo.available_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_bfree = shfl_volinfo.available_allocation_bytes; + stat->f_bavail = shfl_volinfo.available_allocation_bytes; + + stat->f_files = 1000; + /* + * Don't return 0 here since the guest may then think that it is not + * possible to create any more files. + */ + stat->f_ffree = 1000000; + stat->f_fsid.val[0] = 0; + stat->f_fsid.val[1] = 0; + stat->f_namelen = 255; + return 0; +} + +static struct super_operations vboxsf_super_ops = { + .alloc_inode = vboxsf_alloc_inode, + .free_inode = vboxsf_free_inode, + .put_super = vboxsf_put_super, + .statfs = vboxsf_statfs, +}; + +static int vboxsf_setup(void) +{ + int err; + + mutex_lock(&vboxsf_setup_mutex); + + if (vboxsf_setup_done) + goto success; + + vboxsf_inode_cachep = + kmem_cache_create("vboxsf_inode_cache", + sizeof(struct vboxsf_inode), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | + SLAB_ACCOUNT), + vboxsf_inode_init_once); + if (!vboxsf_inode_cachep) { + err = -ENOMEM; + goto fail_nomem; + } + + err = vboxsf_connect(); + if (err) { + vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err); + vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n"); + vbg_err("vboxsf: and check dmesg for vboxguest errors\n"); + goto fail_free_cache; + } + + err = vboxsf_set_utf8(); + if (err) { + vbg_err("vboxsf_setutf8 error %d\n", err); + goto fail_disconnect; + } + + if (!follow_symlinks) { + err = vboxsf_set_symlinks(); + if (err) + vbg_warn("vboxsf: Unable to show symlinks: %d\n", err); + } + + vboxsf_setup_done = true; +success: + mutex_unlock(&vboxsf_setup_mutex); + return 0; + +fail_disconnect: + vboxsf_disconnect(); +fail_free_cache: + kmem_cache_destroy(vboxsf_inode_cachep); +fail_nomem: + mutex_unlock(&vboxsf_setup_mutex); + return err; +} + +static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) +{ + char *options = data; + + if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && + options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && + options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 && + options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) { + vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n"); + return -EINVAL; + } + + return generic_parse_monolithic(fc, data); +} + +static int vboxsf_get_tree(struct fs_context *fc) +{ + int err; + + err = vboxsf_setup(); + if (err) + return err; + + return get_tree_nodev(fc, vboxsf_fill_super); +} + +static int vboxsf_reconfigure(struct fs_context *fc) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb); + struct vboxsf_fs_context *ctx = fc->fs_private; + struct inode *iroot = fc->root->d_sb->s_root->d_inode; + + /* Apply changed options to the root inode */ + sbi->o = ctx->o; + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + + return 0; +} + +static void vboxsf_free_fc(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + + kfree(ctx->nls_name); + kfree(ctx); +} + +static const struct fs_context_operations vboxsf_context_ops = { + .free = vboxsf_free_fc, + .parse_param = vboxsf_parse_param, + .parse_monolithic = vboxsf_parse_monolithic, + .get_tree = vboxsf_get_tree, + .reconfigure = vboxsf_reconfigure, +}; + +static int vboxsf_init_fs_context(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + current_uid_gid(&ctx->o.uid, &ctx->o.gid); + + fc->fs_private = ctx; + fc->ops = &vboxsf_context_ops; + return 0; +} + +static struct file_system_type vboxsf_fs_type = { + .owner = THIS_MODULE, + .name = "vboxsf", + .init_fs_context = vboxsf_init_fs_context, + .kill_sb = kill_anon_super +}; + +/* Module initialization/finalization handlers */ +static int __init vboxsf_init(void) +{ + return register_filesystem(&vboxsf_fs_type); +} + +static void __exit vboxsf_fini(void) +{ + unregister_filesystem(&vboxsf_fs_type); + + mutex_lock(&vboxsf_setup_mutex); + if (vboxsf_setup_done) { + vboxsf_disconnect(); + /* + * Make sure all delayed rcu free inodes are flushed + * before we destroy the cache. + */ + rcu_barrier(); + kmem_cache_destroy(vboxsf_inode_cachep); + } + mutex_unlock(&vboxsf_setup_mutex); +} + +module_init(vboxsf_init); +module_exit(vboxsf_fini); + +MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access"); +MODULE_AUTHOR("Oracle Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("vboxsf"); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c new file mode 100644 index 000000000000..96bd160da48b --- /dev/null +++ b/fs/vboxsf/utils.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Utility functions. + * Mainly conversion from/to VirtualBox/Linux data structures. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/nls.h> +#include <linux/sizes.h> +#include <linux/vfs.h> +#include "vfsmod.h" + +struct inode *vboxsf_new_inode(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + struct inode *inode; + unsigned long flags; + int cursor, ret; + u32 gen; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + cursor = idr_get_cursor(&sbi->ino_idr); + ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC); + if (ret >= 0 && ret < cursor) + sbi->next_generation++; + gen = sbi->next_generation; + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + idr_preload_end(); + + if (ret < 0) { + iput(inode); + return ERR_PTR(ret); + } + + inode->i_ino = ret; + inode->i_generation = gen; + return inode; +} + +/* set [inode] attributes based on [info], uid/gid based on [sbi] */ +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info) +{ + const struct shfl_fsobjattr *attr; + s64 allocated; + int mode; + + attr = &info->attr; + +#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0) + + mode = mode_set(IRUSR); + mode |= mode_set(IWUSR); + mode |= mode_set(IXUSR); + + mode |= mode_set(IRGRP); + mode |= mode_set(IWGRP); + mode |= mode_set(IXGRP); + + mode |= mode_set(IROTH); + mode |= mode_set(IWOTH); + mode |= mode_set(IXOTH); + +#undef mode_set + + /* We use the host-side values for these */ + inode->i_flags |= S_NOATIME | S_NOCMTIME; + inode->i_mapping->a_ops = &vboxsf_reg_aops; + + if (SHFL_IS_DIRECTORY(attr->mode)) { + inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode; + inode->i_mode &= ~sbi->o.dmask; + inode->i_mode |= S_IFDIR; + inode->i_op = &vboxsf_dir_iops; + inode->i_fop = &vboxsf_dir_fops; + /* + * XXX: this probably should be set to the number of entries + * in the directory plus two (. ..) + */ + set_nlink(inode, 1); + } else if (SHFL_IS_SYMLINK(attr->mode)) { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFLNK; + inode->i_op = &vboxsf_lnk_iops; + set_nlink(inode, 1); + } else { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFREG; + inode->i_op = &vboxsf_reg_iops; + inode->i_fop = &vboxsf_reg_fops; + set_nlink(inode, 1); + } + + inode->i_uid = sbi->o.uid; + inode->i_gid = sbi->o.gid; + + inode->i_size = info->size; + inode->i_blkbits = 12; + /* i_blocks always in units of 512 bytes! */ + allocated = info->allocated + 511; + do_div(allocated, 512); + inode->i_blocks = allocated; + + inode->i_atime = ns_to_timespec64( + info->access_time.ns_relative_to_unix_epoch); + inode->i_ctime = ns_to_timespec64( + info->change_time.ns_relative_to_unix_epoch); + inode->i_mtime = ns_to_timespec64( + info->modification_time.ns_relative_to_unix_epoch); +} + +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_create(sbi->root, path, params); + __putname(path); + + return err; +} + +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info) +{ + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + err = vboxsf_create(sbi->root, path, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_EXISTS) + return -ENOENT; + + if (info) + *info = params.info; + + return 0; +} + +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_stat(sbi, path, info); + __putname(path); + return err; +} + +int vboxsf_inode_revalidate(struct dentry *dentry) +{ + struct vboxsf_sbi *sbi; + struct vboxsf_inode *sf_i; + struct shfl_fsobjinfo info; + struct timespec64 prev_mtime; + struct inode *inode; + int err; + + if (!dentry || !d_really_is_positive(dentry)) + return -EINVAL; + + inode = d_inode(dentry); + prev_mtime = inode->i_mtime; + sf_i = VBOXSF_I(inode); + sbi = VBOXSF_SBI(dentry->d_sb); + if (!sf_i->force_restat) { + if (time_before(jiffies, dentry->d_time + sbi->o.ttl)) + return 0; + } + + err = vboxsf_stat_dentry(dentry, &info); + if (err) + return err; + + dentry->d_time = jiffies; + sf_i->force_restat = 0; + vboxsf_init_inode(sbi, inode, &info); + + /* + * If the file was changed on the host side we need to invalidate the + * page-cache for it. Note this also gets triggered by our own writes, + * this is unavoidable. + */ + if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0) + invalidate_inode_pages2(inode->i_mapping); + + return 0; +} + +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int flags) +{ + int err; + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + + switch (flags & AT_STATX_SYNC_TYPE) { + case AT_STATX_DONT_SYNC: + err = 0; + break; + case AT_STATX_FORCE_SYNC: + sf_i->force_restat = 1; + /* fall-through */ + default: + err = vboxsf_inode_revalidate(dentry); + } + if (err) + return err; + + generic_fillattr(d_inode(dentry), kstat); + return 0; +} + +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry)); + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_createparms params = {}; + struct shfl_fsobjinfo info = {}; + u32 buf_len; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | + SHFL_CF_ACCESS_ATTR_WRITE; + + /* this is at least required for Posix hosts */ + if (iattr->ia_valid & ATTR_SIZE) + params.create_flags |= SHFL_CF_ACCESS_WRITE; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err || params.result != SHFL_FILE_EXISTS) + return err ? err : -ENOENT; + +#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0) + + /* + * Setting the file size and setting the other attributes has to + * be handled separately. + */ + if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) { + if (iattr->ia_valid & ATTR_MODE) { + info.attr.mode = mode_set(IRUSR); + info.attr.mode |= mode_set(IWUSR); + info.attr.mode |= mode_set(IXUSR); + info.attr.mode |= mode_set(IRGRP); + info.attr.mode |= mode_set(IWGRP); + info.attr.mode |= mode_set(IXGRP); + info.attr.mode |= mode_set(IROTH); + info.attr.mode |= mode_set(IWOTH); + info.attr.mode |= mode_set(IXOTH); + + if (iattr->ia_mode & S_IFDIR) + info.attr.mode |= SHFL_TYPE_DIRECTORY; + else + info.attr.mode |= SHFL_TYPE_FILE; + } + + if (iattr->ia_valid & ATTR_ATIME) + info.access_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_atime); + + if (iattr->ia_valid & ATTR_MTIME) + info.modification_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_mtime); + + /* + * Ignore ctime (inode change time) as it can't be set + * from userland anyway. + */ + + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + +#undef mode_set + + if (iattr->ia_valid & ATTR_SIZE) { + memset(&info, 0, sizeof(info)); + info.size = iattr->ia_size; + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + + vboxsf_close(sbi->root, params.handle); + + /* Update the inode with what the host has actually given us. */ + if (sf_i->force_restat) + vboxsf_inode_revalidate(dentry); + + return 0; +} + +/* + * [dentry] contains string encoded in coding system that corresponds + * to [sbi]->nls, we must convert it to UTF8 here. + * Returns a shfl_string allocated through __getname (must be freed using + * __putname), or an ERR_PTR on error. + */ +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry) +{ + struct shfl_string *shfl_path; + int path_len, out_len, nb; + char *buf, *path; + wchar_t uni; + u8 *out; + + buf = __getname(); + if (!buf) + return ERR_PTR(-ENOMEM); + + path = dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR(path)) { + __putname(buf); + return ERR_CAST(path); + } + path_len = strlen(path); + + if (sbi->nls) { + shfl_path = __getname(); + if (!shfl_path) { + __putname(buf); + return ERR_PTR(-ENOMEM); + } + + out = shfl_path->string.utf8; + out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1; + + while (path_len) { + nb = sbi->nls->char2uni(path, path_len, &uni); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-EINVAL); + } + path += nb; + path_len -= nb; + + nb = utf32_to_utf8(uni, out, out_len); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + out += nb; + out_len -= nb; + } + *out = 0; + shfl_path->length = out - shfl_path->string.utf8; + shfl_path->size = shfl_path->length + 1; + __putname(buf); + } else { + if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) { + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + /* + * dentry_path stores the name at the end of buf, but the + * shfl_string string we return must be properly aligned. + */ + shfl_path = (struct shfl_string *)buf; + memmove(shfl_path->string.utf8, path, path_len); + shfl_path->string.utf8[path_len] = 0; + shfl_path->length = path_len; + shfl_path->size = path_len + 1; + } + + return shfl_path; +} + +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len) +{ + const char *in; + char *out; + size_t out_len; + size_t out_bound_len; + size_t in_bound_len; + + in = utf8_name; + in_bound_len = utf8_len; + + out = name; + out_len = 0; + /* Reserve space for terminating 0 */ + out_bound_len = name_bound_len - 1; + + while (in_bound_len) { + int nb; + unicode_t uni; + + nb = utf8_to_utf32(in, in_bound_len, &uni); + if (nb < 0) + return -EINVAL; + + in += nb; + in_bound_len -= nb; + + nb = sbi->nls->uni2char(uni, out, out_bound_len); + if (nb < 0) + return nb; + + out += nb; + out_bound_len -= nb; + out_len += nb; + } + + *out = 0; + + return 0; +} + +static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list) +{ + struct vboxsf_dir_buf *b; + + b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; + + b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL); + if (!b->buf) { + kfree(b); + return NULL; + } + + b->entries = 0; + b->used = 0; + b->free = DIR_BUFFER_SIZE; + list_add(&b->head, list); + + return b; +} + +static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b) +{ + list_del(&b->head); + kfree(b->buf); + kfree(b); +} + +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void) +{ + struct vboxsf_dir_info *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->info_list); + return p; +} + +void vboxsf_dir_info_free(struct vboxsf_dir_info *p) +{ + struct list_head *list, *pos, *tmp; + + list = &p->info_list; + list_for_each_safe(pos, tmp, list) { + struct vboxsf_dir_buf *b; + + b = list_entry(pos, struct vboxsf_dir_buf, head); + vboxsf_dir_buf_free(b); + } + kfree(p); +} + +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle) +{ + struct vboxsf_dir_buf *b; + u32 entries, size; + int err = 0; + void *buf; + + /* vboxsf_dirinfo returns 1 on end of dir */ + while (err == 0) { + b = vboxsf_dir_buf_alloc(&sf_d->info_list); + if (!b) { + err = -ENOMEM; + break; + } + + buf = b->buf; + size = b->free; + + err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0, + &size, buf, &entries); + if (err < 0) + break; + + b->entries += entries; + b->free -= size; + b->used += size; + } + + if (b && b->used == 0) + vboxsf_dir_buf_free(b); + + /* -EILSEQ means the host could not translate a filename, ignore */ + if (err > 0 || err == -EILSEQ) + err = 0; + + return err; +} diff --git a/fs/vboxsf/vboxsf_wrappers.c b/fs/vboxsf/vboxsf_wrappers.c new file mode 100644 index 000000000000..bfc78a097dae --- /dev/null +++ b/fs/vboxsf/vboxsf_wrappers.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: MIT +/* + * Wrapper functions for the shfl host calls. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vbox_err.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define SHFL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + +static u32 vboxsf_client_id; + +int vboxsf_connect(void) +{ + struct vbg_dev *gdev; + struct vmmdev_hgcm_service_location loc; + int err, vbox_status; + + loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING; + strcpy(loc.u.localhost.service_name, "VBoxSharedFolders"); + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ENODEV; /* No guest-device */ + + err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc, + &vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); + + return err ? err : vbg_status_code_to_errno(vbox_status); +} + +void vboxsf_disconnect(void) +{ + struct vbg_dev *gdev; + int vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return; /* guest-device is gone, already disconnected */ + + vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); +} + +static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status) +{ + struct vbg_dev *gdev; + int err, vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ESHUTDOWN; /* guest-dev removed underneath us */ + + err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function, + U32_MAX, parms, parm_count, &vbox_status); + vbg_put_gdev(gdev); + + if (err < 0) + return err; + + if (status) + *status = vbox_status; + + return vbg_status_code_to_errno(vbox_status); +} + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root) +{ + struct shfl_map_folder parms; + int err, status; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(folder_name); + parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = 0; + + parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.delimiter.u.value32 = '/'; + + parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.case_sensitive.u.value32 = 1; + + err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER, + &status); + if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED) + vbg_err("%s: Error host is too old\n", __func__); + + *root = parms.root.u.value32; + return err; +} + +int vboxsf_unmap_folder(u32 root) +{ + struct shfl_unmap_folder parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms, + SHFL_CPARMS_UNMAP_FOLDER, NULL); +} + +/** + * vboxsf_create - Create a new file or folder + * @root: Root of the shared folder in which to create the file + * @parsed_path: The path of the file or folder relative to the shared folder + * @param: create_parms Parameters for file/folder creation. + * + * Create a new file or folder or open an existing one in a shared folder. + * Note this function always returns 0 / success unless an exceptional condition + * occurs - out of memory, invalid arguments, etc. If the file or folder could + * not be opened or created, create_parms->handle will be set to + * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result + * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result + * is also set on success as additional information. + * + * Returns: + * 0 or negative errno value. + */ +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms) +{ + struct shfl_create parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.parms.u.pointer.size = sizeof(struct shfl_createparms); + parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms; + + return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL); +} + +int vboxsf_close(u32 root, u64 handle) +{ + struct shfl_close parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + + return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL); +} + +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags) +{ + struct shfl_remove parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL); +} + +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags) +{ + struct shfl_rename parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.src.u.pointer.size = shfl_string_buf_size(src_path); + parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path; + + parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.dest.u.pointer.size = shfl_string_buf_size(dest_path); + parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL); +} + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_read parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_write parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */ +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count) +{ + struct shfl_list parms; + int err, status; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + if (parsed_path) { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + } else { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN; + parms.path.u.pointer.size = 0; + parms.path.u.pointer.u.linear_addr = 0; + } + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.resume_point.u.value32 = index; + parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.file_count.u.value32 = 0; /* out parameter only */ + + err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status); + if (err == -ENODATA && status == VERR_NO_MORE_FILES) + err = 1; + + *buf_len = parms.cb.u.value32; + *file_count = parms.file_count.u.value32; + return err; +} + +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf) +{ + struct shfl_information parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.info.u.pointer.size = *buf_len; + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION, + NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf) +{ + struct shfl_readLink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK, + NULL); +} + +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf) +{ + struct shfl_symlink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.new_path.u.pointer.size = shfl_string_buf_size(new_path); + parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path; + + parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.old_path.u.pointer.size = shfl_string_buf_size(old_path); + parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path; + + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo); + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL); +} + +int vboxsf_set_utf8(void) +{ + return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL); +} + +int vboxsf_set_symlinks(void) +{ + return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL); +} diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h new file mode 100644 index 000000000000..18f95b00fc33 --- /dev/null +++ b/fs/vboxsf/vfsmod.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Guest Shared Folders support: module header. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef VFSMOD_H +#define VFSMOD_H + +#include <linux/backing-dev.h> +#include <linux/idr.h> +#include "shfl_hostintf.h" + +#define DIR_BUFFER_SIZE SZ_16K + +/* The cast is to prevent assignment of void * to pointers of arbitrary type */ +#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info) +#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode) + +struct vboxsf_options { + unsigned long ttl; + kuid_t uid; + kgid_t gid; + bool dmode_set; + bool fmode_set; + umode_t dmode; + umode_t fmode; + umode_t dmask; + umode_t fmask; +}; + +struct vboxsf_fs_context { + struct vboxsf_options o; + char *nls_name; +}; + +/* per-shared folder information */ +struct vboxsf_sbi { + struct vboxsf_options o; + struct shfl_fsobjinfo root_info; + struct idr ino_idr; + spinlock_t ino_idr_lock; /* This protects ino_idr */ + struct nls_table *nls; + u32 next_generation; + u32 root; + int bdi_id; +}; + +/* per-inode information */ +struct vboxsf_inode { + /* some information was changed, update data on next revalidate */ + int force_restat; + /* list of open handles for this inode + lock protecting it */ + struct list_head handle_list; + /* This mutex protects handle_list accesses */ + struct mutex handle_list_mutex; + /* The VFS inode struct */ + struct inode vfs_inode; +}; + +struct vboxsf_dir_info { + struct list_head info_list; +}; + +struct vboxsf_dir_buf { + size_t entries; + size_t free; + size_t used; + void *buf; + struct list_head head; +}; + +/* globals */ +extern const struct inode_operations vboxsf_dir_iops; +extern const struct inode_operations vboxsf_lnk_iops; +extern const struct inode_operations vboxsf_reg_iops; +extern const struct file_operations vboxsf_dir_fops; +extern const struct file_operations vboxsf_reg_fops; +extern const struct address_space_operations vboxsf_reg_aops; +extern const struct dentry_operations vboxsf_dentry_ops; + +/* from utils.c */ +struct inode *vboxsf_new_inode(struct super_block *sb); +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info); +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params); +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info); +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info); +int vboxsf_inode_revalidate(struct dentry *dentry); +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int query_flags); +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr); +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry); +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len); +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void); +void vboxsf_dir_info_free(struct vboxsf_dir_info *p); +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle); + +/* from vboxsf_wrappers.c */ +int vboxsf_connect(void); +void vboxsf_disconnect(void); + +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms); + +int vboxsf_close(u32 root, u64 handle); +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags); +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags); + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); + +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count); +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf); + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root); +int vboxsf_unmap_folder(u32 root); + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf); +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf); + +int vboxsf_set_utf8(void); +int vboxsf_set_symlinks(void); + +#endif diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3a688eb5c5ae..58e937be24ce 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -587,7 +587,7 @@ xfs_dax_writepages( xfs_iflags_clear(ip, XFS_ITRUNCATED); return dax_writeback_mapping_range(mapping, - xfs_inode_buftarg(ip)->bt_bdev, wbc); + xfs_inode_buftarg(ip)->bt_daxdev, wbc); } STATIC sector_t diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 760901783944..2094386af8ac 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -62,7 +62,7 @@ enum { Opt_discard, Opt_nodiscard, Opt_dax, }; -static const struct fs_parameter_spec xfs_param_specs[] = { +static const struct fs_parameter_spec xfs_fs_parameters[] = { fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), @@ -106,11 +106,6 @@ static const struct fs_parameter_spec xfs_param_specs[] = { {} }; -static const struct fs_parameter_description xfs_fs_parameters = { - .name = "xfs", - .specs = xfs_param_specs, -}; - struct proc_xfs_info { uint64_t flag; char *str; @@ -1120,7 +1115,7 @@ xfs_fc_parse_param( int size = 0; int opt; - opt = fs_parse(fc, &xfs_fs_parameters, param, &result); + opt = fs_parse(fc, xfs_fs_parameters, param, &result); if (opt < 0) return opt; @@ -1782,7 +1777,7 @@ static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", .init_fs_context = xfs_init_fs_context, - .parameters = &xfs_fs_parameters, + .parameters = xfs_fs_parameters, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig new file mode 100644 index 000000000000..fb87ad372e29 --- /dev/null +++ b/fs/zonefs/Kconfig @@ -0,0 +1,9 @@ +config ZONEFS_FS + tristate "zonefs filesystem support" + depends on BLOCK + depends on BLK_DEV_ZONED + help + zonefs is a simple file system which exposes zones of a zoned block + device (e.g. host-managed or host-aware SMR disk drives) as files. + + If unsure, say N. diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile new file mode 100644 index 000000000000..75a380aa1ae1 --- /dev/null +++ b/fs/zonefs/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ZONEFS_FS) += zonefs.o + +zonefs-y := super.o diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c new file mode 100644 index 000000000000..8bc6ef82d693 --- /dev/null +++ b/fs/zonefs/super.c @@ -0,0 +1,1439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple file system for zoned block devices exposing zones as files. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/iomap.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/statfs.h> +#include <linux/writeback.h> +#include <linux/quotaops.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/uio.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/crc32.h> + +#include "zonefs.h" + +static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + + /* All I/Os should always be within the file maximum size */ + if (WARN_ON_ONCE(offset + length > zi->i_max_size)) + return -EIO; + + /* + * Sequential zones can only accept direct writes. This is already + * checked when writes are issued, so warn if we see a page writeback + * operation. + */ + if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) + return -EIO; + + /* + * For conventional zones, all blocks are always mapped. For sequential + * zones, all blocks after always mapped below the inode size (zone + * write pointer) and unwriten beyond. + */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (offset >= isize) + iomap->type = IOMAP_UNWRITTEN; + else + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_WRITE) + length = zi->i_max_size - offset; + else + length = min(length, isize - offset); + mutex_unlock(&zi->i_truncate_mutex); + + iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); + iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; + iomap->bdev = inode->i_sb->s_bdev; + iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + + return 0; +} + +static const struct iomap_ops zonefs_iomap_ops = { + .iomap_begin = zonefs_iomap_begin, +}; + +static int zonefs_readpage(struct file *unused, struct page *page) +{ + return iomap_readpage(page, &zonefs_iomap_ops); +} + +static int zonefs_readpages(struct file *unused, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops); +} + +/* + * Map blocks for page writeback. This is used only on conventional zone files, + * which implies that the page range can only be within the fixed inode size. + */ +static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return -EIO; + if (WARN_ON_ONCE(offset >= i_size_read(inode))) + return -EIO; + + /* If the mapping is already OK, nothing needs to be done */ + if (offset >= wpc->iomap.offset && + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + + return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, + IOMAP_WRITE, &wpc->iomap, NULL); +} + +static const struct iomap_writeback_ops zonefs_writeback_ops = { + .map_blocks = zonefs_map_blocks, +}; + +static int zonefs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepage(page, wbc, &wpc, &zonefs_writeback_ops); +} + +static int zonefs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); +} + +static const struct address_space_operations zonefs_file_aops = { + .readpage = zonefs_readpage, + .readpages = zonefs_readpages, + .writepage = zonefs_writepage, + .writepages = zonefs_writepages, + .set_page_dirty = iomap_set_page_dirty, + .releasepage = iomap_releasepage, + .invalidatepage = iomap_invalidatepage, + .migratepage = iomap_migrate_page, + .is_partially_uptodate = iomap_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, + .direct_IO = noop_direct_IO, +}; + +static void zonefs_update_stats(struct inode *inode, loff_t new_isize) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t old_isize = i_size_read(inode); + loff_t nr_blocks; + + if (new_isize == old_isize) + return; + + spin_lock(&sbi->s_lock); + + /* + * This may be called for an update after an IO error. + * So beware of the values seen. + */ + if (new_isize < old_isize) { + nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > nr_blocks) + sbi->s_used_blocks -= nr_blocks; + else + sbi->s_used_blocks = 0; + } else { + sbi->s_used_blocks += + (new_isize - old_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > sbi->s_blocks) + sbi->s_used_blocks = sbi->s_blocks; + } + + spin_unlock(&sbi->s_lock); +} + +/* + * Check a zone condition and adjust its file inode access permissions for + * offline and readonly zones. Return the inode size corresponding to the + * amount of readable data in the zone. + */ +static loff_t zonefs_check_zone_condition(struct inode *inode, + struct blk_zone *zone, bool warn) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + switch (zone->cond) { + case BLK_ZONE_COND_OFFLINE: + /* + * Dead zone: make the inode immutable, disable all accesses + * and set the file size to 0 (zone wp set to zone start). + */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: offline zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0777; + zone->wp = zone->start; + return 0; + case BLK_ZONE_COND_READONLY: + /* Do not allow writes in read-only zones */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0222; + /* fallthrough */ + default: + if (zi->i_ztype == ZONEFS_ZTYPE_CNV) + return zi->i_max_size; + return (zone->wp - zone->start) << SECTOR_SHIFT; + } +} + +struct zonefs_ioerr_data { + struct inode *inode; + bool write; +}; + +static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_ioerr_data *err = data; + struct inode *inode = err->inode; + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t isize, data_size; + + /* + * Check the zone condition: if the zone is not "bad" (offline or + * read-only), read errors are simply signaled to the IO issuer as long + * as there is no inconsistency between the inode size and the amount of + * data writen in the zone (data_size). + */ + data_size = zonefs_check_zone_condition(inode, zone, true); + isize = i_size_read(inode); + if (zone->cond != BLK_ZONE_COND_OFFLINE && + zone->cond != BLK_ZONE_COND_READONLY && + !err->write && isize == data_size) + return 0; + + /* + * At this point, we detected either a bad zone or an inconsistency + * between the inode size and the amount of data written in the zone. + * For the latter case, the cause may be a write IO error or an external + * action on the device. Two error patterns exist: + * 1) The inode size is lower than the amount of data in the zone: + * a write operation partially failed and data was writen at the end + * of the file. This can happen in the case of a large direct IO + * needing several BIOs and/or write requests to be processed. + * 2) The inode size is larger than the amount of data in the zone: + * this can happen with a deferred write error with the use of the + * device side write cache after getting successful write IO + * completions. Other possibilities are (a) an external corruption, + * e.g. an application reset the zone directly, or (b) the device + * has a serious problem (e.g. firmware bug). + * + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size) + zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); + + /* + * First handle bad zones signaled by hardware. The mount options + * errors=zone-ro and errors=zone-offline result in changing the + * zone condition to read-only and offline respectively, as if the + * condition was signaled by the hardware. + */ + if (zone->cond == BLK_ZONE_COND_OFFLINE || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) { + zonefs_warn(sb, "inode %lu: read/write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_OFFLINE) { + zone->cond = BLK_ZONE_COND_OFFLINE; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } else if (zone->cond == BLK_ZONE_COND_READONLY || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { + zonefs_warn(sb, "inode %lu: write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_READONLY) { + zone->cond = BLK_ZONE_COND_READONLY; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } + + /* + * If error=remount-ro was specified, any error result in remounting + * the volume as read-only. + */ + if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + } + + /* + * Update block usage stats and the inode size to prevent access to + * invalid data. + */ + zonefs_update_stats(inode, data_size); + i_size_write(inode, data_size); + zi->i_wpoffset = data_size; + + return 0; +} + +/* + * When an file IO error occurs, check the file zone to see if there is a change + * in the zone condition (e.g. offline or read-only). For a failed write to a + * sequential zone, the zone write pointer position must also be checked to + * eventually correct the file size and zonefs inode write pointer offset + * (which can be out of sync with the drive due to partial write failures). + */ +static void zonefs_io_error(struct inode *inode, bool write) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; + unsigned int nr_zones = + zi->i_max_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + struct zonefs_ioerr_data err = { + .inode = inode, + .write = write, + }; + int ret; + + mutex_lock(&zi->i_truncate_mutex); + + /* + * Memory allocations in blkdev_report_zones() can trigger a memory + * reclaim which may in turn cause a recursion into zonefs as well as + * struct request allocations for the same device. The former case may + * end up in a deadlock on the inode truncate mutex, while the latter + * may prevent IO forward progress. Executing the report zones under + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); + ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, + zonefs_io_error_cb, &err); + if (ret != nr_zones) + zonefs_err(sb, "Get inode %lu zone information failed %d\n", + inode->i_ino, ret); + memalloc_noio_restore(noio_flag); + + mutex_unlock(&zi->i_truncate_mutex); +} + +static int zonefs_file_truncate(struct inode *inode, loff_t isize) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t old_isize; + enum req_opf op; + int ret = 0; + + /* + * Only sequential zone files can be truncated and truncation is allowed + * only down to a 0 size, which is equivalent to a zone reset, and to + * the maximum file size, which is equivalent to a zone finish. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return -EPERM; + + if (!isize) + op = REQ_OP_ZONE_RESET; + else if (isize == zi->i_max_size) + op = REQ_OP_ZONE_FINISH; + else + return -EPERM; + + inode_dio_wait(inode); + + /* Serialize against page faults */ + down_write(&zi->i_mmap_sem); + + /* Serialize against zonefs_iomap_begin() */ + mutex_lock(&zi->i_truncate_mutex); + + old_isize = i_size_read(inode); + if (isize == old_isize) + goto unlock; + + ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, + zi->i_max_size >> SECTOR_SHIFT, GFP_NOFS); + if (ret) { + zonefs_err(inode->i_sb, + "Zone management operation at %llu failed %d", + zi->i_zsector, ret); + goto unlock; + } + + zonefs_update_stats(inode, isize); + truncate_setsize(inode, isize); + zi->i_wpoffset = isize; + +unlock: + mutex_unlock(&zi->i_truncate_mutex); + up_write(&zi->i_mmap_sem); + + return ret; +} + +static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = d_inode(dentry); + int ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + ret = setattr_prepare(dentry, iattr); + if (ret) + return ret; + + /* + * Since files and directories cannot be created nor deleted, do not + * allow setting any write attributes on the sub-directories grouping + * files by zone type. + */ + if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && + (iattr->ia_mode & 0222)) + return -EPERM; + + if (((iattr->ia_valid & ATTR_UID) && + !uid_eq(iattr->ia_uid, inode->i_uid)) || + ((iattr->ia_valid & ATTR_GID) && + !gid_eq(iattr->ia_gid, inode->i_gid))) { + ret = dquot_transfer(inode, iattr); + if (ret) + return ret; + } + + if (iattr->ia_valid & ATTR_SIZE) { + ret = zonefs_file_truncate(inode, iattr->ia_size); + if (ret) + return ret; + } + + setattr_copy(inode, iattr); + + return 0; +} + +static const struct inode_operations zonefs_file_inode_operations = { + .setattr = zonefs_inode_setattr, +}; + +static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct inode *inode = file_inode(file); + int ret = 0; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + /* + * Since only direct writes are allowed in sequential files, page cache + * flush is needed only for conventional zone files. + */ + if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV) + ret = file_write_and_wait_range(file, start, end); + if (!ret) + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + + if (ret) + zonefs_io_error(inode, true); + + return ret; +} + +static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) +{ + struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); + vm_fault_t ret; + + down_read(&zi->i_mmap_sem); + ret = filemap_fault(vmf); + up_read(&zi->i_mmap_sem); + + return ret; +} + +static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + vm_fault_t ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + + /* + * Sanity check: only conventional zone files can have shared + * writeable mappings. + */ + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return VM_FAULT_NOPAGE; + + sb_start_pagefault(inode->i_sb); + file_update_time(vmf->vma->vm_file); + + /* Serialize against truncates */ + down_read(&zi->i_mmap_sem); + ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); + up_read(&zi->i_mmap_sem); + + sb_end_pagefault(inode->i_sb); + return ret; +} + +static const struct vm_operations_struct zonefs_file_vm_ops = { + .fault = zonefs_filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = zonefs_filemap_page_mkwrite, +}; + +static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* + * Conventional zones accept random writes, so their files can support + * shared writable mappings. For sequential zone files, only read + * mappings are possible since there are no guarantees for write + * ordering between msync() and page cache writeback. + */ + if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ && + (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + + file_accessed(file); + vma->vm_ops = &zonefs_file_vm_ops; + + return 0; +} + +static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) +{ + loff_t isize = i_size_read(file_inode(file)); + + /* + * Seeks are limited to below the zone size for conventional zones + * and below the zone write pointer for sequential zones. In both + * cases, this limit is the inode size. + */ + return generic_file_llseek_size(file, offset, whence, isize, isize); +} + +static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (error) { + zonefs_io_error(inode, true); + return error; + } + + if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) { + /* + * Note that we may be seeing completions out of order, + * but that is not a problem since a write completed + * successfully necessarily means that all preceding writes + * were also successful. So we can safely increase the inode + * size to the write end location. + */ + mutex_lock(&zi->i_truncate_mutex); + if (i_size_read(inode) < iocb->ki_pos + size) { + zonefs_update_stats(inode, iocb->ki_pos + size); + i_size_write(inode, iocb->ki_pos + size); + } + mutex_unlock(&zi->i_truncate_mutex); + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_write_dio_ops = { + .end_io = zonefs_file_write_dio_end_io, +}; + +/* + * Handle direct writes. For sequential zone files, this is the only possible + * write path. For these files, check that the user is issuing writes + * sequentially from the end of the file. This code assumes that the block layer + * delivers write requests to the device in sequential order. This is always the + * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE + * elevator feature is being used (e.g. mq-deadline). The block layer always + * automatically select such an elevator for zoned block devices during the + * device initialization. + */ +static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + size_t count; + ssize_t ret; + + /* + * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT + * as this can cause write reordering (e.g. the first aio gets EAGAIN + * on the inode lock but the second goes through but is now unaligned). + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) + && (iocb->ki_flags & IOCB_NOWAIT)) + iocb->ki_flags &= ~IOCB_NOWAIT; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + count = iov_iter_count(from); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + + /* Enforce sequential writes (append only) in sequential zones */ + mutex_lock(&zi->i_truncate_mutex); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) { + mutex_unlock(&zi->i_truncate_mutex); + ret = -EINVAL; + goto inode_unlock; + } + mutex_unlock(&zi->i_truncate_mutex); + + ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, + &zonefs_write_dio_ops, is_sync_kiocb(iocb)); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (ret > 0 || ret == -EIOCBQUEUED)) { + if (ret > 0) + count = ret; + mutex_lock(&zi->i_truncate_mutex); + zi->i_wpoffset += count; + mutex_unlock(&zi->i_truncate_mutex); + } + +inode_unlock: + inode_unlock(inode); + + return ret; +} + +static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, + struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + ssize_t ret; + + /* + * Direct IO writes are mandatory for sequential zone files so that the + * write IO issuing order is preserved. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_CNV) + return -EIO; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + + ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); + if (ret > 0) + iocb->ki_pos += ret; + else if (ret == -EIO) + zonefs_io_error(inode, true); + +inode_unlock: + inode_unlock(inode); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + + return ret; +} + +static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (sb_rdonly(inode->i_sb)) + return -EROFS; + + /* Write operations beyond the zone size are not allowed */ + if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) + return -EFBIG; + + if (iocb->ki_flags & IOCB_DIRECT) + return zonefs_file_dio_write(iocb, from); + + return zonefs_file_buffered_write(iocb, from); +} + +static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + if (error) { + zonefs_io_error(file_inode(iocb->ki_filp), false); + return error; + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_read_dio_ops = { + .end_io = zonefs_file_read_dio_end_io, +}; + +static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + ssize_t ret; + + /* Offline zones cannot be read */ + if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) + return -EPERM; + + if (iocb->ki_pos >= zi->i_max_size) + return 0; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + + /* Limit read operations to written data */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (iocb->ki_pos >= isize) { + mutex_unlock(&zi->i_truncate_mutex); + ret = 0; + goto inode_unlock; + } + iov_iter_truncate(to, isize - iocb->ki_pos); + mutex_unlock(&zi->i_truncate_mutex); + + if (iocb->ki_flags & IOCB_DIRECT) { + size_t count = iov_iter_count(to); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + file_accessed(iocb->ki_filp); + ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, + &zonefs_read_dio_ops, is_sync_kiocb(iocb)); + } else { + ret = generic_file_read_iter(iocb, to); + if (ret == -EIO) + zonefs_io_error(inode, false); + } + +inode_unlock: + inode_unlock_shared(inode); + + return ret; +} + +static const struct file_operations zonefs_file_operations = { + .open = generic_file_open, + .fsync = zonefs_file_fsync, + .mmap = zonefs_file_mmap, + .llseek = zonefs_file_llseek, + .read_iter = zonefs_file_read_iter, + .write_iter = zonefs_file_write_iter, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, + .iopoll = iomap_dio_iopoll, +}; + +static struct kmem_cache *zonefs_inode_cachep; + +static struct inode *zonefs_alloc_inode(struct super_block *sb) +{ + struct zonefs_inode_info *zi; + + zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL); + if (!zi) + return NULL; + + inode_init_once(&zi->i_vnode); + mutex_init(&zi->i_truncate_mutex); + init_rwsem(&zi->i_mmap_sem); + + return &zi->i_vnode; +} + +static void zonefs_free_inode(struct inode *inode) +{ + kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); +} + +/* + * File system stat. + */ +static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + enum zonefs_ztype t; + u64 fsid; + + buf->f_type = ZONEFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_namelen = ZONEFS_NAME_MAX; + + spin_lock(&sbi->s_lock); + + buf->f_blocks = sbi->s_blocks; + if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) + buf->f_bfree = 0; + else + buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; + buf->f_bavail = buf->f_bfree; + + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + if (sbi->s_nr_files[t]) + buf->f_files += sbi->s_nr_files[t] + 1; + } + buf->f_ffree = 0; + + spin_unlock(&sbi->s_lock); + + fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^ + le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64)); + buf->f_fsid.val[0] = (u32)fsid; + buf->f_fsid.val[1] = (u32)(fsid >> 32); + + return 0; +} + +enum { + Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, + Opt_err, +}; + +static const match_table_t tokens = { + { Opt_errors_ro, "errors=remount-ro"}, + { Opt_errors_zro, "errors=zone-ro"}, + { Opt_errors_zol, "errors=zone-offline"}, + { Opt_errors_repair, "errors=repair"}, + { Opt_err, NULL} +}; + +static int zonefs_parse_options(struct super_block *sb, char *options) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_errors_ro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; + break; + case Opt_errors_zro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; + break; + case Opt_errors_zol: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; + break; + case Opt_errors_repair: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static int zonefs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); + + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) + seq_puts(seq, ",errors=remount-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) + seq_puts(seq, ",errors=zone-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) + seq_puts(seq, ",errors=zone-offline"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) + seq_puts(seq, ",errors=repair"); + + return 0; +} + +static int zonefs_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + + return zonefs_parse_options(sb, data); +} + +static const struct super_operations zonefs_sops = { + .alloc_inode = zonefs_alloc_inode, + .free_inode = zonefs_free_inode, + .statfs = zonefs_statfs, + .remount_fs = zonefs_remount, + .show_options = zonefs_show_options, +}; + +static const struct inode_operations zonefs_dir_inode_operations = { + .lookup = simple_lookup, + .setattr = zonefs_inode_setattr, +}; + +static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, + enum zonefs_ztype type) +{ + struct super_block *sb = parent->i_sb; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1; + inode_init_owner(inode, parent, S_IFDIR | 0555); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + inc_nlink(parent); +} + +static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; + inode->i_mode = S_IFREG | sbi->s_perm; + + zi->i_ztype = type; + zi->i_zsector = zone->start; + zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, + zone->len << SECTOR_SHIFT); + zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true); + + inode->i_uid = sbi->s_uid; + inode->i_gid = sbi->s_gid; + inode->i_size = zi->i_wpoffset; + inode->i_blocks = zone->len; + + inode->i_op = &zonefs_file_inode_operations; + inode->i_fop = &zonefs_file_operations; + inode->i_mapping->a_ops = &zonefs_file_aops; + + sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); + sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; + sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; +} + +static struct dentry *zonefs_create_inode(struct dentry *parent, + const char *name, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct inode *dir = d_inode(parent); + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = new_inode(parent->d_sb); + if (!inode) + goto dput; + + inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; + if (zone) + zonefs_init_file_inode(inode, zone, type); + else + zonefs_init_dir_inode(dir, inode, type); + d_add(dentry, inode); + dir->i_size++; + + return dentry; + +dput: + dput(dentry); + + return NULL; +} + +struct zonefs_zone_data { + struct super_block *sb; + unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; + struct blk_zone *zones; +}; + +/* + * Create a zone group and populate it with zone files. + */ +static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + enum zonefs_ztype type) +{ + struct super_block *sb = zd->sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct blk_zone *zone, *next, *end; + const char *zgroup_name; + char *file_name; + struct dentry *dir; + unsigned int n = 0; + int ret = -ENOMEM; + + /* If the group is empty, there is nothing to do */ + if (!zd->nr_zones[type]) + return 0; + + file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + if (type == ZONEFS_ZTYPE_CNV) + zgroup_name = "cnv"; + else + zgroup_name = "seq"; + + dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); + if (!dir) + goto free; + + /* + * The first zone contains the super block: skip it. + */ + end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk); + for (zone = &zd->zones[1]; zone < end; zone = next) { + + next = zone + 1; + if (zonefs_zone_type(zone) != type) + continue; + + /* + * For conventional zones, contiguous zones can be aggregated + * together to form larger files. Note that this overwrites the + * length of the first zone of the set of contiguous zones + * aggregated together. If one offline or read-only zone is + * found, assume that all zones aggregated have the same + * condition. + */ + if (type == ZONEFS_ZTYPE_CNV && + (sbi->s_features & ZONEFS_F_AGGRCNV)) { + for (; next < end; next++) { + if (zonefs_zone_type(next) != type) + break; + zone->len += next->len; + if (next->cond == BLK_ZONE_COND_READONLY && + zone->cond != BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_READONLY; + else if (next->cond == BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_OFFLINE; + } + } + + /* + * Use the file number within its group as file name. + */ + snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); + if (!zonefs_create_inode(dir, file_name, zone, type)) + goto free; + + n++; + } + + zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", + zgroup_name, n, n > 1 ? "s" : ""); + + sbi->s_nr_files[type] = n; + ret = 0; + +free: + kfree(file_name); + + return ret; +} + +static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_zone_data *zd = data; + + /* + * Count the number of usable zones: the first zone at index 0 contains + * the super block and is ignored. + */ + switch (zone->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: + zone->wp = zone->start + zone->len; + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_CNV]++; + break; + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_SEQ]++; + break; + default: + zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", + zone->type); + return -EIO; + } + + memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); + + return 0; +} + +static int zonefs_get_zone_info(struct zonefs_zone_data *zd) +{ + struct block_device *bdev = zd->sb->s_bdev; + int ret; + + zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk), + sizeof(struct blk_zone), GFP_KERNEL); + if (!zd->zones) + return -ENOMEM; + + /* Get zones information from the device */ + ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, + zonefs_get_zone_info_cb, zd); + if (ret < 0) { + zonefs_err(zd->sb, "Zone report failed %d\n", ret); + return ret; + } + + if (ret != blkdev_nr_zones(bdev->bd_disk)) { + zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", + ret, blkdev_nr_zones(bdev->bd_disk)); + return -EIO; + } + + return 0; +} + +static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd) +{ + kvfree(zd->zones); +} + +/* + * Read super block information from the device. + */ +static int zonefs_read_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_super *super; + u32 crc, stored_crc; + struct page *page; + struct bio_vec bio_vec; + struct bio bio; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio.bi_iter.bi_sector = 0; + bio.bi_opf = REQ_OP_READ; + bio_set_dev(&bio, sb->s_bdev); + bio_add_page(&bio, page, PAGE_SIZE, 0); + + ret = submit_bio_wait(&bio); + if (ret) + goto free_page; + + super = kmap(page); + + ret = -EINVAL; + if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) + goto unmap; + + stored_crc = le32_to_cpu(super->s_crc); + super->s_crc = 0; + crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); + if (crc != stored_crc) { + zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", + crc, stored_crc); + goto unmap; + } + + sbi->s_features = le64_to_cpu(super->s_features); + if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { + zonefs_err(sb, "Unknown features set 0x%llx\n", + sbi->s_features); + goto unmap; + } + + if (sbi->s_features & ZONEFS_F_UID) { + sbi->s_uid = make_kuid(current_user_ns(), + le32_to_cpu(super->s_uid)); + if (!uid_valid(sbi->s_uid)) { + zonefs_err(sb, "Invalid UID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_GID) { + sbi->s_gid = make_kgid(current_user_ns(), + le32_to_cpu(super->s_gid)); + if (!gid_valid(sbi->s_gid)) { + zonefs_err(sb, "Invalid GID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_PERM) + sbi->s_perm = le32_to_cpu(super->s_perm); + + if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { + zonefs_err(sb, "Reserved area is being used\n"); + goto unmap; + } + + uuid_copy(&sbi->s_uuid, (uuid_t *)super->s_uuid); + ret = 0; + +unmap: + kunmap(page); +free_page: + __free_page(page); + + return ret; +} + +/* + * Check that the device is zoned. If it is, get the list of zones and create + * sub-directories and files according to the device zone configuration and + * format options. + */ +static int zonefs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct zonefs_zone_data zd; + struct zonefs_sb_info *sbi; + struct inode *inode; + enum zonefs_ztype t; + int ret; + + if (!bdev_is_zoned(sb->s_bdev)) { + zonefs_err(sb, "Not a zoned block device\n"); + return -EINVAL; + } + + /* + * Initialize super block information: the maximum file size is updated + * when the zone files are created so that the format option + * ZONEFS_F_AGGRCNV which increases the maximum file size of a file + * beyond the zone size is taken into account. + */ + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + spin_lock_init(&sbi->s_lock); + sb->s_fs_info = sbi; + sb->s_magic = ZONEFS_MAGIC; + sb->s_maxbytes = 0; + sb->s_op = &zonefs_sops; + sb->s_time_gran = 1; + + /* + * The block size is set to the device physical sector size to ensure + * that write operations on 512e devices (512B logical block and 4KB + * physical block) are always aligned to the device physical blocks, + * as mandated by the ZBC/ZAC specifications. + */ + sb_set_blocksize(sb, bdev_physical_block_size(sb->s_bdev)); + sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); + sbi->s_uid = GLOBAL_ROOT_UID; + sbi->s_gid = GLOBAL_ROOT_GID; + sbi->s_perm = 0640; + sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; + + ret = zonefs_read_super(sb); + if (ret) + return ret; + + ret = zonefs_parse_options(sb, data); + if (ret) + return ret; + + memset(&zd, 0, sizeof(struct zonefs_zone_data)); + zd.sb = sb; + ret = zonefs_get_zone_info(&zd); + if (ret) + goto cleanup; + + zonefs_info(sb, "Mounting %u zones", + blkdev_nr_zones(sb->s_bdev->bd_disk)); + + /* Create root directory inode */ + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto cleanup; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk); + inode->i_mode = S_IFDIR | 0555; + inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto cleanup; + + /* Create and populate files in zone groups directories */ + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + ret = zonefs_create_zgroup(&zd, t); + if (ret) + break; + } + +cleanup: + zonefs_cleanup_zone_info(&zd); + + return ret; +} + +static struct dentry *zonefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); +} + +static void zonefs_kill_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + + if (sb->s_root) + d_genocide(sb->s_root); + kill_block_super(sb); + kfree(sbi); +} + +/* + * File system definition and registration. + */ +static struct file_system_type zonefs_type = { + .owner = THIS_MODULE, + .name = "zonefs", + .mount = zonefs_mount, + .kill_sb = zonefs_kill_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init zonefs_init_inodecache(void) +{ + zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", + sizeof(struct zonefs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), + NULL); + if (zonefs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void zonefs_destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy the inode cache. + */ + rcu_barrier(); + kmem_cache_destroy(zonefs_inode_cachep); +} + +static int __init zonefs_init(void) +{ + int ret; + + BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); + + ret = zonefs_init_inodecache(); + if (ret) + return ret; + + ret = register_filesystem(&zonefs_type); + if (ret) { + zonefs_destroy_inodecache(); + return ret; + } + + return 0; +} + +static void __exit zonefs_exit(void) +{ + zonefs_destroy_inodecache(); + unregister_filesystem(&zonefs_type); +} + +MODULE_AUTHOR("Damien Le Moal"); +MODULE_DESCRIPTION("Zone file system for zoned block devices"); +MODULE_LICENSE("GPL"); +module_init(zonefs_init); +module_exit(zonefs_exit); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h new file mode 100644 index 000000000000..ad17fef7ce91 --- /dev/null +++ b/fs/zonefs/zonefs.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple zone file system for zoned block devices. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#ifndef __ZONEFS_H__ +#define __ZONEFS_H__ + +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/uuid.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> + +/* + * Maximum length of file names: this only needs to be large enough to fit + * the zone group directory names and a decimal zone number for file names. + * 16 characters is plenty. + */ +#define ZONEFS_NAME_MAX 16 + +/* + * Zone types: ZONEFS_ZTYPE_SEQ is used for all sequential zone types + * defined in linux/blkzoned.h, that is, BLK_ZONE_TYPE_SEQWRITE_REQ and + * BLK_ZONE_TYPE_SEQWRITE_PREF. + */ +enum zonefs_ztype { + ZONEFS_ZTYPE_CNV, + ZONEFS_ZTYPE_SEQ, + ZONEFS_ZTYPE_MAX, +}; + +static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return ZONEFS_ZTYPE_CNV; + return ZONEFS_ZTYPE_SEQ; +} + +/* + * In-memory inode data. + */ +struct zonefs_inode_info { + struct inode i_vnode; + + /* File zone type */ + enum zonefs_ztype i_ztype; + + /* File zone start sector (512B unit) */ + sector_t i_zsector; + + /* File zone write pointer position (sequential zones only) */ + loff_t i_wpoffset; + + /* File maximum size */ + loff_t i_max_size; + + /* + * To serialise fully against both syscall and mmap based IO and + * sequential file truncation, two locks are used. For serializing + * zonefs_seq_file_truncate() against zonefs_iomap_begin(), that is, + * file truncate operations against block mapping, i_truncate_mutex is + * used. i_truncate_mutex also protects against concurrent accesses + * and changes to the inode private data, and in particular changes to + * a sequential file size on completion of direct IO writes. + * Serialization of mmap read IOs with truncate and syscall IO + * operations is done with i_mmap_sem in addition to i_truncate_mutex. + * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, + * i_truncate_mutex second). + */ + struct mutex i_truncate_mutex; + struct rw_semaphore i_mmap_sem; +}; + +static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) +{ + return container_of(inode, struct zonefs_inode_info, i_vnode); +} + +/* + * On-disk super block (block 0). + */ +#define ZONEFS_LABEL_LEN 64 +#define ZONEFS_UUID_SIZE 16 +#define ZONEFS_SUPER_SIZE 4096 + +struct zonefs_super { + + /* Magic number */ + __le32 s_magic; + + /* Checksum */ + __le32 s_crc; + + /* Volume label */ + char s_label[ZONEFS_LABEL_LEN]; + + /* 128-bit uuid */ + __u8 s_uuid[ZONEFS_UUID_SIZE]; + + /* Features */ + __le64 s_features; + + /* UID/GID to use for files */ + __le32 s_uid; + __le32 s_gid; + + /* File permissions */ + __le32 s_perm; + + /* Padding to ZONEFS_SUPER_SIZE bytes */ + __u8 s_reserved[3988]; + +} __packed; + +/* + * Feature flags: specified in the s_features field of the on-disk super + * block struct zonefs_super and in-memory in the s_feartures field of + * struct zonefs_sb_info. + */ +enum zonefs_features { + /* + * Aggregate contiguous conventional zones into a single file. + */ + ZONEFS_F_AGGRCNV = 1ULL << 0, + /* + * Use super block specified UID for files instead of default 0. + */ + ZONEFS_F_UID = 1ULL << 1, + /* + * Use super block specified GID for files instead of default 0. + */ + ZONEFS_F_GID = 1ULL << 2, + /* + * Use super block specified file permissions instead of default 640. + */ + ZONEFS_F_PERM = 1ULL << 3, +}; + +#define ZONEFS_F_DEFINED_FEATURES \ + (ZONEFS_F_AGGRCNV | ZONEFS_F_UID | ZONEFS_F_GID | ZONEFS_F_PERM) + +/* + * Mount options for zone write pointer error handling. + */ +#define ZONEFS_MNTOPT_ERRORS_RO (1 << 0) /* Make zone file readonly */ +#define ZONEFS_MNTOPT_ERRORS_ZRO (1 << 1) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_ZOL (1 << 2) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_REPAIR (1 << 3) /* Remount read-only */ +#define ZONEFS_MNTOPT_ERRORS_MASK \ + (ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \ + ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR) + +/* + * In-memory Super block information. + */ +struct zonefs_sb_info { + + unsigned long s_mount_opts; + + spinlock_t s_lock; + + unsigned long long s_features; + kuid_t s_uid; + kgid_t s_gid; + umode_t s_perm; + uuid_t s_uuid; + unsigned int s_zone_sectors_shift; + + unsigned int s_nr_files[ZONEFS_ZTYPE_MAX]; + + loff_t s_blocks; + loff_t s_used_blocks; +}; + +static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +#define zonefs_info(sb, format, args...) \ + pr_info("zonefs (%s): " format, sb->s_id, ## args) +#define zonefs_err(sb, format, args...) \ + pr_err("zonefs (%s) ERROR: " format, sb->s_id, ## args) +#define zonefs_warn(sb, format, args...) \ + pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) + +#endif |