summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/super.c32
-rw-r--r--fs/cachefiles/rdwr.c27
-rw-r--r--fs/ceph/cache.c4
-rw-r--r--fs/ceph/super.c40
-rw-r--r--fs/cifs/cifs_ioctl.h6
-rw-r--r--fs/cifs/cifsacl.c14
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/dir.c5
-rw-r--r--fs/cifs/file.c21
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/ioctl.c18
-rw-r--r--fs/cifs/link.c18
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb1ops.c19
-rw-r--r--fs/cifs/smb2inode.c9
-rw-r--r--fs/cifs/smb2ops.c145
-rw-r--r--fs/cifs/smb2pdu.c36
-rw-r--r--fs/cifs/smb2pdu.h16
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/smb2transport.c5
-rw-r--r--fs/cifs/trace.h27
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/cramfs/inode.c14
-rw-r--r--fs/dax.c11
-rw-r--r--fs/ecryptfs/mmap.c16
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/f2fs/data.c16
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/fs_context.c79
-rw-r--r--fs/fs_parser.c447
-rw-r--r--fs/fsopen.c26
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c21
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/fuse/readdir.c2
-rw-r--r--fs/gfs2/file.c72
-rw-r--r--fs/gfs2/lops.c2
-rw-r--r--fs/gfs2/ops_fstype.c103
-rw-r--r--fs/hugetlbfs/inode.c13
-rw-r--r--fs/inode.c32
-rw-r--r--fs/ioctl.c44
-rw-r--r--fs/jbd2/journal.c21
-rw-r--r--fs/jffs2/super.c26
-rw-r--r--fs/namespace.c49
-rw-r--r--fs/nfs/Kconfig11
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/callback_xdr.c11
-rw-r--r--fs/nfs/client.c84
-rw-r--r--fs/nfs/delegation.c80
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c83
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/dns_resolve.c2
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c34
-rw-r--r--fs/nfs/fs_context.c1440
-rw-r--r--fs/nfs/fscache.c2
-rw-r--r--fs/nfs/getroot.c73
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h143
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/namespace.c146
-rw-r--r--fs/nfs/nfs2xdr.c12
-rw-r--r--fs/nfs/nfs3_fs.h2
-rw-r--r--fs/nfs/nfs3client.c6
-rw-r--r--fs/nfs/nfs3proc.c28
-rw-r--r--fs/nfs/nfs3xdr.c5
-rw-r--r--fs/nfs/nfs42proc.c40
-rw-r--r--fs/nfs/nfs4_fs.h19
-rw-r--r--fs/nfs/nfs4client.c99
-rw-r--r--fs/nfs/nfs4file.c1
-rw-r--r--fs/nfs/nfs4namespace.c298
-rw-r--r--fs/nfs/nfs4proc.c104
-rw-r--r--fs/nfs/nfs4renewd.c5
-rw-r--r--fs/nfs/nfs4state.c7
-rw-r--r--fs/nfs/nfs4super.c257
-rw-r--r--fs/nfs/nfs4trace.c4
-rw-r--r--fs/nfs/nfs4trace.h237
-rw-r--r--fs/nfs/nfs4xdr.c7
-rw-r--r--fs/nfs/nfstrace.h279
-rw-r--r--fs/nfs/pnfs.c4
-rw-r--r--fs/nfs/pnfs.h8
-rw-r--r--fs/nfs/pnfs_nfs.c7
-rw-r--r--fs/nfs/proc.c24
-rw-r--r--fs/nfs/read.c7
-rw-r--r--fs/nfs/super.c1972
-rw-r--r--fs/nfs/write.c32
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/filecache.c313
-rw-r--r--fs/nfsd/filecache.h7
-rw-r--r--fs/nfsd/netns.h6
-rw-r--r--fs/nfsd/nfs3proc.c5
-rw-r--r--fs/nfsd/nfs3xdr.c36
-rw-r--r--fs/nfsd/nfs4callback.c11
-rw-r--r--fs/nfsd/nfs4layouts.c2
-rw-r--r--fs/nfsd/nfs4proc.c462
-rw-r--r--fs/nfsd/nfs4recover.c8
-rw-r--r--fs/nfsd/nfs4state.c262
-rw-r--r--fs/nfsd/nfs4xdr.c161
-rw-r--r--fs/nfsd/nfsctl.c6
-rw-r--r--fs/nfsd/nfsd.h34
-rw-r--r--fs/nfsd/nfsfh.h9
-rw-r--r--fs/nfsd/nfsproc.c8
-rw-r--r--fs/nfsd/nfssvc.c21
-rw-r--r--fs/nfsd/state.h44
-rw-r--r--fs/nfsd/trace.h22
-rw-r--r--fs/nfsd/vfs.c109
-rw-r--r--fs/nfsd/vfs.h18
-rw-r--r--fs/nfsd/xdr3.h4
-rw-r--r--fs/nfsd/xdr4.h39
-rw-r--r--fs/orangefs/orangefs-debugfs.c1
-rw-r--r--fs/pipe.c67
-rw-r--r--fs/proc/root.c13
-rw-r--r--fs/ramfs/inode.c11
-rw-r--r--fs/splice.c8
-rw-r--r--fs/unicode/Makefile2
-rw-r--r--fs/vboxsf/Kconfig10
-rw-r--r--fs/vboxsf/Makefile5
-rw-r--r--fs/vboxsf/dir.c427
-rw-r--r--fs/vboxsf/file.c379
-rw-r--r--fs/vboxsf/shfl_hostintf.h901
-rw-r--r--fs/vboxsf/super.c491
-rw-r--r--fs/vboxsf/utils.c551
-rw-r--r--fs/vboxsf/vboxsf_wrappers.c371
-rw-r--r--fs/vboxsf/vfsmod.h137
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_super.c11
-rw-r--r--fs/zonefs/Kconfig9
-rw-r--r--fs/zonefs/Makefile4
-rw-r--r--fs/zonefs/super.c1439
-rw-r--r--fs/zonefs/zonefs.h189
140 files changed, 10034 insertions, 3771 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 7b623e9fc1b0..708ba336e689 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig"
source "fs/btrfs/Kconfig"
source "fs/nilfs2/Kconfig"
source "fs/f2fs/Kconfig"
+source "fs/zonefs/Kconfig"
config FS_DAX
bool "Direct Access (DAX) support"
@@ -264,6 +265,7 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/erofs/Kconfig"
+source "fs/vboxsf/Kconfig"
endif # MISC_FILESYSTEMS
diff --git a/fs/Makefile b/fs/Makefile
index 98be354fdb61..505e51166973 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -133,3 +133,5 @@ obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/
+obj-$(CONFIG_VBOXSF_FS) += vboxsf/
+obj-$(CONFIG_ZONEFS_FS) += zonefs/
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7f8a9b3137bf..dda7a9a66848 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -38,13 +38,13 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int afs_show_devname(struct seq_file *m, struct dentry *root);
static int afs_show_options(struct seq_file *m, struct dentry *root);
static int afs_init_fs_context(struct fs_context *fc);
-static const struct fs_parameter_description afs_fs_parameters;
+static const struct fs_parameter_spec afs_fs_parameters[];
struct file_system_type afs_fs_type = {
.owner = THIS_MODULE,
.name = "afs",
.init_fs_context = afs_init_fs_context,
- .parameters = &afs_fs_parameters,
+ .parameters = afs_fs_parameters,
.kill_sb = afs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE,
};
@@ -73,28 +73,22 @@ enum afs_param {
Opt_source,
};
-static const struct fs_parameter_spec afs_param_specs[] = {
- fsparam_flag ("autocell", Opt_autocell),
- fsparam_flag ("dyn", Opt_dyn),
- fsparam_enum ("flock", Opt_flock),
- fsparam_string("source", Opt_source),
+static const struct constant_table afs_param_flock[] = {
+ {"local", afs_flock_mode_local },
+ {"openafs", afs_flock_mode_openafs },
+ {"strict", afs_flock_mode_strict },
+ {"write", afs_flock_mode_write },
{}
};
-static const struct fs_parameter_enum afs_param_enums[] = {
- { Opt_flock, "local", afs_flock_mode_local },
- { Opt_flock, "openafs", afs_flock_mode_openafs },
- { Opt_flock, "strict", afs_flock_mode_strict },
- { Opt_flock, "write", afs_flock_mode_write },
+static const struct fs_parameter_spec afs_fs_parameters[] = {
+ fsparam_flag ("autocell", Opt_autocell),
+ fsparam_flag ("dyn", Opt_dyn),
+ fsparam_enum ("flock", Opt_flock, afs_param_flock),
+ fsparam_string("source", Opt_source),
{}
};
-static const struct fs_parameter_description afs_fs_parameters = {
- .name = "kAFS",
- .specs = afs_param_specs,
- .enums = afs_param_enums,
-};
-
/*
* initialise the filesystem
*/
@@ -323,7 +317,7 @@ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct afs_fs_context *ctx = fc->fs_private;
int opt;
- opt = fs_parse(fc, &afs_fs_parameters, param, &result);
+ opt = fs_parse(fc, afs_fs_parameters, param, &result);
if (opt < 0)
return opt;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 44a3ce1e4ce4..1dc97f2d6201 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -396,7 +396,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
struct cachefiles_object *object;
struct cachefiles_cache *cache;
struct inode *inode;
- sector_t block0, block;
+ sector_t block;
unsigned shift;
int ret;
@@ -412,7 +412,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
inode = d_backing_inode(object->backer);
ASSERT(S_ISREG(inode->i_mode));
- ASSERT(inode->i_mapping->a_ops->bmap);
ASSERT(inode->i_mapping->a_ops->readpages);
/* calculate the shift required to use bmap */
@@ -428,12 +427,14 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
* enough for this as it doesn't indicate errors, but it's all we've
* got for the moment
*/
- block0 = page->index;
- block0 <<= shift;
+ block = page->index;
+ block <<= shift;
+
+ ret = bmap(inode, &block);
+ ASSERT(ret < 0);
- block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
_debug("%llx -> %llx",
- (unsigned long long) block0,
+ (unsigned long long) (page->index << shift),
(unsigned long long) block);
if (block) {
@@ -711,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
inode = d_backing_inode(object->backer);
ASSERT(S_ISREG(inode->i_mode));
- ASSERT(inode->i_mapping->a_ops->bmap);
ASSERT(inode->i_mapping->a_ops->readpages);
/* calculate the shift required to use bmap */
@@ -728,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
ret = space ? -ENODATA : -ENOBUFS;
list_for_each_entry_safe(page, _n, pages, lru) {
- sector_t block0, block;
+ sector_t block;
/* we assume the absence or presence of the first block is a
* good enough indication for the page as a whole
@@ -736,13 +736,14 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
* good enough for this as it doesn't indicate errors, but
* it's all we've got for the moment
*/
- block0 = page->index;
- block0 <<= shift;
+ block = page->index;
+ block <<= shift;
+
+ ret = bmap(inode, &block);
+ ASSERT(!ret);
- block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
- block0);
_debug("%llx -> %llx",
- (unsigned long long) block0,
+ (unsigned long long) (page->index << shift),
(unsigned long long) block);
if (block) {
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 73f24f307a4a..270b769607a2 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -67,7 +67,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc)
if (uniq_len && memcmp(ent->uniquifier, fscache_uniq, uniq_len))
continue;
- errorf(fc, "ceph: fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option",
+ errorfc(fc, "fscache cookie already registered for fsid %pU, use fsc=<uniquifier> option",
fsid);
err = -EBUSY;
goto out_unlock;
@@ -96,7 +96,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc)
list_add_tail(&ent->list, &ceph_fscache_list);
} else {
kfree(ent);
- errorf(fc, "ceph: unable to register fscache cookie for fsid %pU",
+ errorfc(fc, "unable to register fscache cookie for fsid %pU",
fsid);
/* all other fs ignore this error */
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index bfb8aead0555..1d9f083b8a11 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -162,13 +162,13 @@ enum ceph_recover_session_mode {
ceph_recover_session_clean
};
-static const struct fs_parameter_enum ceph_mount_param_enums[] = {
- { Opt_recover_session, "no", ceph_recover_session_no },
- { Opt_recover_session, "clean", ceph_recover_session_clean },
+static const struct constant_table ceph_param_recover[] = {
+ { "no", ceph_recover_session_no },
+ { "clean", ceph_recover_session_clean },
{}
};
-static const struct fs_parameter_spec ceph_mount_param_specs[] = {
+static const struct fs_parameter_spec ceph_mount_parameters[] = {
fsparam_flag_no ("acl", Opt_acl),
fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
fsparam_s32 ("caps_max", Opt_caps_max),
@@ -178,8 +178,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("copyfrom", Opt_copyfrom),
fsparam_flag_no ("dcache", Opt_dcache),
fsparam_flag_no ("dirstat", Opt_dirstat),
- __fsparam (fs_param_is_string, "fsc", Opt_fscache,
- fs_param_neg_with_no | fs_param_v_optional),
+ fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc
+ fsparam_string ("fsc", Opt_fscache), // fsc=...
fsparam_flag_no ("ino32", Opt_ino32),
fsparam_string ("mds_namespace", Opt_mds_namespace),
fsparam_flag_no ("poolperm", Opt_poolperm),
@@ -188,7 +188,7 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("rbytes", Opt_rbytes),
fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
- fsparam_enum ("recover_session", Opt_recover_session),
+ fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover),
fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
@@ -197,12 +197,6 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
{}
};
-static const struct fs_parameter_description ceph_mount_parameters = {
- .name = "ceph",
- .specs = ceph_mount_param_specs,
- .enums = ceph_mount_param_enums,
-};
-
struct ceph_parse_opts_ctx {
struct ceph_options *copts;
struct ceph_mount_options *opts;
@@ -226,7 +220,7 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dout("%s '%s'\n", __func__, dev_name);
if (!dev_name || !*dev_name)
- return invalf(fc, "ceph: Empty source");
+ return invalfc(fc, "Empty source");
dev_name_end = strchr(dev_name, '/');
if (dev_name_end) {
@@ -245,14 +239,14 @@ static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
dev_name_end--; /* back up to ':' separator */
if (dev_name_end < dev_name || *dev_name_end != ':')
- return invalf(fc, "ceph: No path or : separator in source");
+ return invalfc(fc, "No path or : separator in source");
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
if (fsopt->server_path)
dout("server path '%s'\n", fsopt->server_path);
ret = ceph_parse_mon_ips(param->string, dev_name_end - dev_name,
- pctx->copts, fc);
+ pctx->copts, fc->log.log);
if (ret)
return ret;
@@ -270,11 +264,11 @@ static int ceph_parse_mount_param(struct fs_context *fc,
unsigned int mode;
int token, ret;
- ret = ceph_parse_param(param, pctx->copts, fc);
+ ret = ceph_parse_param(param, pctx->copts, fc->log.log);
if (ret != -ENOPARAM)
return ret;
- token = fs_parse(fc, &ceph_mount_parameters, param, &result);
+ token = fs_parse(fc, ceph_mount_parameters, param, &result);
dout("%s fs_parse '%s' token %d\n", __func__, param->key, token);
if (token < 0)
return token;
@@ -301,7 +295,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
break;
case Opt_source:
if (fc->source)
- return invalf(fc, "ceph: Multiple sources specified");
+ return invalfc(fc, "Multiple sources specified");
return ceph_parse_source(param, fc);
case Opt_wsize:
if (result.uint_32 < PAGE_SIZE ||
@@ -392,7 +386,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
}
break;
#else
- return invalf(fc, "ceph: fscache support is disabled");
+ return invalfc(fc, "fscache support is disabled");
#endif
case Opt_poolperm:
if (!result.negated)
@@ -423,7 +417,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
#ifdef CONFIG_CEPH_FS_POSIX_ACL
fc->sb_flags |= SB_POSIXACL;
#else
- return invalf(fc, "ceph: POSIX ACL support is disabled");
+ return invalfc(fc, "POSIX ACL support is disabled");
#endif
} else {
fc->sb_flags &= ~SB_POSIXACL;
@@ -435,7 +429,7 @@ static int ceph_parse_mount_param(struct fs_context *fc,
return 0;
out_of_range:
- return invalf(fc, "ceph: %s out of range", param->key);
+ return invalfc(fc, "%s out of range", param->key);
}
static void destroy_mount_options(struct ceph_mount_options *args)
@@ -1101,7 +1095,7 @@ static int ceph_get_tree(struct fs_context *fc)
dout("ceph_get_tree\n");
if (!fc->source)
- return invalf(fc, "ceph: No source");
+ return invalfc(fc, "No source");
#ifdef CONFIG_CEPH_FS_POSIX_ACL
fc->sb_flags |= SB_POSIXACL;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 0f0dc1c1fe41..153d5c842a9b 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -65,6 +65,11 @@ struct smb3_key_debug_info {
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
} __packed;
+struct smb3_notify {
+ __u32 completion_filter;
+ bool watch_tree;
+} __packed;
+
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
@@ -72,3 +77,4 @@ struct smb3_key_debug_info {
#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info)
+#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fb41e51dd574..440828afcdde 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1084,7 +1084,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
struct cifs_ntsd *pntsd = NULL;
int oplock = 0;
unsigned int xid;
- int rc, create_options = 0;
+ int rc;
struct cifs_tcon *tcon;
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
struct cifs_fid fid;
@@ -1096,13 +1096,10 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
tcon = tlink_tcon(tlink);
xid = get_xid();
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = READ_CONTROL;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.disposition = FILE_OPEN;
oparms.path = path;
oparms.fid = &fid;
@@ -1147,7 +1144,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
{
int oplock = 0;
unsigned int xid;
- int rc, access_flags, create_options = 0;
+ int rc, access_flags;
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
@@ -1160,9 +1157,6 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
tcon = tlink_tcon(tlink);
xid = get_xid();
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
access_flags = WRITE_OWNER;
else
@@ -1171,7 +1165,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = access_flags;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.disposition = FILE_OPEN;
oparms.path = path;
oparms.fid = &fid;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5492b9860baa..febab27cd838 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -275,7 +275,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_ffree = 0; /* unlimited */
if (server->ops->queryfs)
- rc = server->ops->queryfs(xid, tcon, buf);
+ rc = server->ops->queryfs(xid, tcon, cifs_sb, buf);
free_xid(xid);
return 0;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 239338d57086..de82cfa44b1a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -298,7 +298,8 @@ struct smb_version_operations {
const char *, struct dfs_info3_param **,
unsigned int *, const struct nls_table *, int);
/* informational QFS call */
- void (*qfs_tcon)(const unsigned int, struct cifs_tcon *);
+ void (*qfs_tcon)(const unsigned int, struct cifs_tcon *,
+ struct cifs_sb_info *);
/* check if a path is accessible or not */
int (*is_path_accessible)(const unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const char *);
@@ -409,7 +410,7 @@ struct smb_version_operations {
struct cifsInodeInfo *);
/* query remote filesystem */
int (*queryfs)(const unsigned int, struct cifs_tcon *,
- struct kstatfs *);
+ struct cifs_sb_info *, struct kstatfs *);
/* send mandatory brlock to the server */
int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64,
__u64, __u32, int, int, bool);
@@ -430,6 +431,8 @@ struct smb_version_operations {
struct cifsFileInfo *src_file);
int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *src_file, void __user *);
+ int (*notify)(const unsigned int xid, struct file *pfile,
+ void __user *pbuf);
int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
struct cifs_sb_info *, const unsigned char *,
char *, unsigned int *);
@@ -490,6 +493,7 @@ struct smb_version_operations {
/* ioctl passthrough for query_info */
int (*ioctl_query_info)(const unsigned int xid,
struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
__le16 *path, int is_dir,
unsigned long p);
/* make unix special files (block, char, fifo, socket) */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 948bf3474db1..89eaaf46d1ca 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -612,4 +612,12 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
}
#endif
+static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
+{
+ if (cifs_sb && (backup_cred(cifs_sb)))
+ return options | CREATE_OPEN_BACKUP_INTENT;
+ else
+ return options;
+}
+
#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a481296f417f..3c89569e7210 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -260,7 +260,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
if (server->tcpStatus != CifsNeedReconnect)
break;
- if (--retries)
+ if (retries && --retries)
continue;
/*
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0aa3623ae0e1..a941ac7a659d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -4365,7 +4365,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
/* do not care if a following call succeed - informational */
if (!tcon->pipe && server->ops->qfs_tcon) {
- server->ops->qfs_tcon(*xid, tcon);
+ server->ops->qfs_tcon(*xid, tcon, cifs_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
if (tcon->fsDevInfo.DeviceCharacteristics &
cpu_to_le32(FILE_READ_ONLY_DEVICE))
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index f3b79012ff29..0ef099442f20 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -355,13 +355,10 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
if (!tcon->unix_ext && (mode & S_IWUGO) == 0)
create_options |= CREATE_OPTION_READONLY;
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = desired_access;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.disposition = disposition;
oparms.path = full_path;
oparms.fid = fid;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a4e8f7d445ac..bc9516ab4b34 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -222,9 +222,6 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
if (!buf)
return -ENOMEM;
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (f_flags & O_SYNC)
create_options |= CREATE_WRITE_THROUGH;
@@ -235,7 +232,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = desired_access;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.disposition = disposition;
oparms.path = full_path;
oparms.fid = fid;
@@ -752,9 +749,6 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
desired_access = cifs_convert_flags(cfile->f_flags);
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
/* O_SYNC also has bit for O_DSYNC so following check picks up either */
if (cfile->f_flags & O_SYNC)
create_options |= CREATE_WRITE_THROUGH;
@@ -768,7 +762,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = desired_access;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.disposition = disposition;
oparms.path = full_path;
oparms.fid = &cfile->fid;
@@ -2599,8 +2593,10 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
rc = file_write_and_wait_range(file, start, end);
- if (rc)
+ if (rc) {
+ trace_cifs_fsync_err(inode->i_ino, rc);
return rc;
+ }
xid = get_xid();
@@ -2638,8 +2634,10 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
rc = file_write_and_wait_range(file, start, end);
- if (rc)
+ if (rc) {
+ trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
return rc;
+ }
xid = get_xid();
@@ -2672,7 +2670,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
rc = filemap_write_and_wait(inode->i_mapping);
cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
-
+ if (rc)
+ trace_cifs_flush_err(inode->i_ino, rc);
return rc;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 676e96a7a9f0..9ba623b601ec 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -475,9 +475,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_READ;
- oparms.create_options = CREATE_NOT_DIR;
- if (backup_cred(cifs_sb))
- oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.path = path;
oparms.fid = &fid;
@@ -1285,7 +1283,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = DELETE | FILE_WRITE_ATTRIBUTES;
- oparms.create_options = CREATE_NOT_DIR;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.path = full_path;
oparms.fid = &fid;
@@ -1823,7 +1821,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
oparms.cifs_sb = cifs_sb;
/* open the file to be renamed -- we need DELETE perms */
oparms.desired_access = DELETE;
- oparms.create_options = CREATE_NOT_DIR;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.path = from_path;
oparms.fid = &fid;
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 1a01e108d75e..4a73e63c4d43 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -65,7 +65,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
if (tcon->ses->server->ops->ioctl_query_info)
rc = tcon->ses->server->ops->ioctl_query_info(
- xid, tcon, utf16_path,
+ xid, tcon, cifs_sb, utf16_path,
filep->private_data ? 0 : 1, p);
else
rc = -EOPNOTSUPP;
@@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
unsigned int xid;
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
+ struct cifs_sb_info *cifs_sb;
__u64 ExtAttrBits = 0;
__u64 caps;
@@ -299,6 +300,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
else
rc = 0;
break;
+ case CIFS_IOC_NOTIFY:
+ if (!S_ISDIR(inode->i_mode)) {
+ /* Notify can only be done on directories */
+ rc = -EOPNOTSUPP;
+ break;
+ }
+ cifs_sb = CIFS_SB(inode->i_sb);
+ tcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
+ if (tcon && tcon->ses->server->ops->notify) {
+ rc = tcon->ses->server->ops->notify(xid,
+ filep, (void __user *)arg);
+ cifs_dbg(FYI, "ioctl notify rc %d\n", rc);
+ } else
+ rc = -EOPNOTSUPP;
+ break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index b736acd3917b..852aa00ec729 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -315,7 +315,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_READ;
- oparms.create_options = CREATE_NOT_DIR;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.path = path;
oparms.fid = &fid;
@@ -353,15 +353,11 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid fid;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms;
- int create_options = CREATE_NOT_DIR;
-
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_CREATE;
oparms.path = path;
oparms.fid = &fid;
@@ -402,9 +398,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_READ;
- oparms.create_options = CREATE_NOT_DIR;
- if (backup_cred(cifs_sb))
- oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.fid = &fid;
oparms.reconnect = false;
@@ -457,14 +451,10 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid fid;
struct cifs_open_parms oparms;
struct cifs_io_parms io_parms;
- int create_options = CREATE_NOT_DIR;
__le16 *utf16_path;
__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct kvec iov[2];
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
cifs_dbg(FYI, "%s: path: %s\n", __func__, path);
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
@@ -474,7 +464,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_CREATE;
oparms.fid = &fid;
oparms.reconnect = false;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index d17587c2c4ab..ba9dadf3be24 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -196,7 +196,8 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* may look wrong since the inodes may not have timed out by the time
* "ls" does a stat() call on them.
*/
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
+ (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL &&
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f0795c856d8f..43a88e26d26b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -101,7 +101,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
iface_count = ses->iface_count;
if (iface_count <= 0) {
spin_unlock(&ses->iface_lock);
- cifs_dbg(FYI, "no iface list available to open channels\n");
+ cifs_dbg(VFS, "no iface list available to open channels\n");
return 0;
}
ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces),
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index d70a2bb062df..eb994e313c6a 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -504,7 +504,8 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
}
static void
-cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
+cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb)
{
CIFSSMBQFSDeviceInfo(xid, tcon);
CIFSSMBQFSAttributeInfo(xid, tcon);
@@ -565,7 +566,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = FILE_READ_ATTRIBUTES;
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.disposition = FILE_OPEN;
oparms.path = full_path;
oparms.fid = &fid;
@@ -793,7 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES;
- oparms.create_options = CREATE_NOT_DIR;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR);
oparms.disposition = FILE_OPEN;
oparms.path = full_path;
oparms.fid = &fid;
@@ -872,7 +873,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
static int
cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
- struct kstatfs *buf)
+ struct cifs_sb_info *cifs_sb, struct kstatfs *buf)
{
int rc = -EOPNOTSUPP;
@@ -970,7 +971,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = FILE_READ_ATTRIBUTES;
- oparms.create_options = OPEN_REPARSE_POINT;
+ oparms.create_options = cifs_create_options(cifs_sb,
+ OPEN_REPARSE_POINT);
oparms.disposition = FILE_OPEN;
oparms.path = full_path;
oparms.fid = &fid;
@@ -1029,7 +1031,6 @@ cifs_make_node(unsigned int xid, struct inode *inode,
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct inode *newinode = NULL;
int rc = -EPERM;
- int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL;
FILE_ALL_INFO *buf = NULL;
struct cifs_io_parms io_parms;
__u32 oplock = 0;
@@ -1090,13 +1091,11 @@ cifs_make_node(unsigned int xid, struct inode *inode,
goto out;
}
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL);
oparms.disposition = FILE_CREATE;
oparms.path = full_path;
oparms.fid = &fid;
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 5ef5e97a6d13..1cf207564ff9 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -99,9 +99,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = desired_access;
oparms.disposition = create_disposition;
- oparms.create_options = create_options;
- if (backup_cred(cifs_sb))
- oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.fid = &fid;
oparms.reconnect = false;
oparms.mode = mode;
@@ -457,7 +455,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
/* If it is a root and its handle is cached then use it */
if (!strlen(full_path) && !no_cached_open) {
- rc = open_shroot(xid, tcon, &fid);
+ rc = open_shroot(xid, tcon, cifs_sb, &fid);
if (rc)
goto out;
@@ -474,9 +472,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
goto out;
}
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
cifs_get_readable_path(tcon, full_path, &cfile);
rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 6787fce26f20..baa825f4cec0 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -655,7 +655,8 @@ smb2_cached_lease_break(struct work_struct *work)
/*
* Open the directory at the root of a share
*/
-int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
+int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid)
{
struct cifs_ses *ses = tcon->ses;
struct TCP_Server_Info *server = ses->server;
@@ -702,7 +703,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
oparms.tcon = tcon;
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
oparms.fid = pfid;
@@ -818,7 +819,8 @@ oshr_free:
}
static void
-smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
+smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb)
{
int rc;
__le16 srch_path = 0; /* Null - open root of share */
@@ -830,7 +832,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -838,7 +840,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL,
NULL);
else
- rc = open_shroot(xid, tcon, &fid);
+ rc = open_shroot(xid, tcon, cifs_sb, &fid);
if (rc)
return;
@@ -860,7 +862,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
}
static void
-smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
+smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb)
{
int rc;
__le16 srch_path = 0; /* Null - open root of share */
@@ -871,7 +874,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -906,10 +909,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -1151,10 +1151,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = FILE_WRITE_EA;
oparms.disposition = FILE_OPEN;
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -1422,6 +1419,7 @@ req_res_key_exit:
static int
smb2_ioctl_query_info(const unsigned int xid,
struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
__le16 *path, int is_dir,
unsigned long p)
{
@@ -1447,6 +1445,7 @@ smb2_ioctl_query_info(const unsigned int xid,
struct kvec close_iov[1];
unsigned int size[2];
void *data[2];
+ int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR;
memset(rqst, 0, sizeof(rqst));
resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
@@ -1477,10 +1476,7 @@ smb2_ioctl_query_info(const unsigned int xid,
memset(&oparms, 0, sizeof(oparms));
oparms.tcon = tcon;
oparms.disposition = FILE_OPEN;
- if (is_dir)
- oparms.create_options = CREATE_NOT_FILE;
- else
- oparms.create_options = CREATE_NOT_DIR;
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -2049,6 +2045,66 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
+
+
+static int
+smb3_notify(const unsigned int xid, struct file *pfile,
+ void __user *ioc_buf)
+{
+ struct smb3_notify notify;
+ struct dentry *dentry = pfile->f_path.dentry;
+ struct inode *inode = file_inode(pfile);
+ struct cifs_sb_info *cifs_sb;
+ struct cifs_open_parms oparms;
+ struct cifs_fid fid;
+ struct cifs_tcon *tcon;
+ unsigned char *path = NULL;
+ __le16 *utf16_path = NULL;
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ int rc = 0;
+
+ path = build_path_from_dentry(dentry);
+ if (path == NULL)
+ return -ENOMEM;
+
+ cifs_sb = CIFS_SB(inode->i_sb);
+
+ utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb);
+ if (utf16_path == NULL) {
+ rc = -ENOMEM;
+ goto notify_exit;
+ }
+
+ if (copy_from_user(&notify, ioc_buf, sizeof(struct smb3_notify))) {
+ rc = -EFAULT;
+ goto notify_exit;
+ }
+
+ tcon = cifs_sb_master_tcon(cifs_sb);
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES;
+ oparms.disposition = FILE_OPEN;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL);
+ if (rc)
+ goto notify_exit;
+
+ rc = SMB2_change_notify(xid, tcon, fid.persistent_fid, fid.volatile_fid,
+ notify.watch_tree, notify.completion_filter);
+
+ SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+
+ cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc);
+
+notify_exit:
+ kfree(path);
+ kfree(utf16_path);
+ return rc;
+}
+
static int
smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
const char *path, struct cifs_sb_info *cifs_sb,
@@ -2086,10 +2142,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA;
oparms.disposition = FILE_OPEN;
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = fid;
oparms.reconnect = false;
@@ -2343,10 +2396,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = desired_access;
oparms.disposition = FILE_OPEN;
- if (cifs_sb && backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -2402,7 +2452,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
static int
smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
- struct kstatfs *buf)
+ struct cifs_sb_info *cifs_sb, struct kstatfs *buf)
{
struct smb2_query_info_rsp *rsp;
struct smb2_fs_full_size_info *info = NULL;
@@ -2417,7 +2467,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
FS_FULL_SIZE_INFORMATION,
SMB2_O_INFO_FILESYSTEM,
sizeof(struct smb2_fs_full_size_info),
- &rsp_iov, &buftype, NULL);
+ &rsp_iov, &buftype, cifs_sb);
if (rc)
goto qfs_exit;
@@ -2439,7 +2489,7 @@ qfs_exit:
static int
smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
- struct kstatfs *buf)
+ struct cifs_sb_info *cifs_sb, struct kstatfs *buf)
{
int rc;
__le16 srch_path = 0; /* Null - open root of share */
@@ -2448,12 +2498,12 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid fid;
if (!tcon->posix_extensions)
- return smb2_queryfs(xid, tcon, buf);
+ return smb2_queryfs(xid, tcon, cifs_sb, buf);
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
- oparms.create_options = 0;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -2722,6 +2772,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct smb2_create_rsp *create_rsp;
struct smb2_ioctl_rsp *ioctl_rsp;
struct reparse_data_buffer *reparse_buf;
+ int create_options = is_reparse_point ? OPEN_REPARSE_POINT : 0;
u32 plen;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -2748,14 +2799,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
oparms.tcon = tcon;
oparms.desired_access = FILE_READ_ATTRIBUTES;
oparms.disposition = FILE_OPEN;
-
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
- if (is_reparse_point)
- oparms.create_options = OPEN_REPARSE_POINT;
-
+ oparms.create_options = cifs_create_options(cifs_sb, create_options);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -2934,11 +2978,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
tcon = tlink_tcon(tlink);
xid = get_xid();
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
-
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path) {
rc = -ENOMEM;
@@ -2949,6 +2988,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
oparms.tcon = tcon;
oparms.desired_access = READ_CONTROL;
oparms.disposition = FILE_OPEN;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.fid = &fid;
oparms.reconnect = false;
@@ -2990,11 +3030,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
tcon = tlink_tcon(tlink);
xid = get_xid();
- if (backup_cred(cifs_sb))
- oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
- else
- oparms.create_options = 0;
-
if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
access_flags = WRITE_OWNER;
else
@@ -3009,6 +3044,7 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
oparms.tcon = tcon;
oparms.desired_access = access_flags;
+ oparms.create_options = cifs_create_options(cifs_sb, 0);
oparms.disposition = FILE_OPEN;
oparms.path = path;
oparms.fid = &fid;
@@ -4491,7 +4527,6 @@ smb2_make_node(unsigned int xid, struct inode *inode,
{
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
int rc = -EPERM;
- int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL;
FILE_ALL_INFO *buf = NULL;
struct cifs_io_parms io_parms;
__u32 oplock = 0;
@@ -4527,13 +4562,11 @@ smb2_make_node(unsigned int xid, struct inode *inode,
goto out;
}
- if (backup_cred(cifs_sb))
- create_options |= CREATE_OPEN_BACKUP_INTENT;
-
oparms.tcon = tcon;
oparms.cifs_sb = cifs_sb;
oparms.desired_access = GENERIC_WRITE;
- oparms.create_options = create_options;
+ oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
+ CREATE_OPTION_SPECIAL);
oparms.disposition = FILE_CREATE;
oparms.path = full_path;
oparms.fid = &fid;
@@ -4868,6 +4901,7 @@ struct smb_version_operations smb30_operations = {
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
+ .notify = smb3_notify,
.init_transform_rq = smb3_init_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
@@ -4978,6 +5012,7 @@ struct smb_version_operations smb311_operations = {
.dir_needs_close = smb2_dir_needs_close,
.fallocate = smb3_fallocate,
.enum_snapshots = smb3_enum_snapshots,
+ .notify = smb3_notify,
.init_transform_rq = smb3_init_transform_rq,
.is_transform_hdr = smb3_is_transform_hdr,
.receive_transform = smb3_receive_transform,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 14f209f7376f..1234f9ccab03 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
}
rc = cifs_negotiate_protocol(0, tcon->ses);
- if (!rc && tcon->ses->need_reconnect)
+ if (!rc && tcon->ses->need_reconnect) {
rc = cifs_setup_session(0, tcon->ses, nls_codepage);
-
+ if ((rc == -EACCES) && !tcon->retry) {
+ rc = -EHOSTDOWN;
+ mutex_unlock(&tcon->ses->session_mutex);
+ goto failed;
+ }
+ }
if (rc || !tcon->need_reconnect) {
mutex_unlock(&tcon->ses->session_mutex);
goto out;
@@ -397,6 +402,7 @@ out:
case SMB2_SET_INFO:
rc = -EAGAIN;
}
+failed:
unload_nls(nls_codepage);
return rc;
}
@@ -1933,6 +1939,16 @@ parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf)
buf->IndexNumber = pdisk_id->DiskFileId;
}
+static void
+parse_posix_ctxt(struct create_context *cc, struct smb_posix_info *pposix_inf)
+{
+ /* struct smb_posix_info *ppinf = (struct smb_posix_info *)cc; */
+
+ /* TODO: Need to add parsing for the context and return */
+ printk_once(KERN_WARNING
+ "SMB3 3.11 POSIX response context not completed yet\n");
+}
+
void
smb2_parse_contexts(struct TCP_Server_Info *server,
struct smb2_create_rsp *rsp,
@@ -1944,6 +1960,9 @@ smb2_parse_contexts(struct TCP_Server_Info *server,
unsigned int next;
unsigned int remaining;
char *name;
+ const char smb3_create_tag_posix[] = {0x93, 0xAD, 0x25, 0x50, 0x9C,
+ 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83,
+ 0xDE, 0x96, 0x8B, 0xCD, 0x7C};
*oplock = 0;
data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
@@ -1963,6 +1982,15 @@ smb2_parse_contexts(struct TCP_Server_Info *server,
else if (buf && (le16_to_cpu(cc->NameLength) == 4) &&
strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0)
parse_query_id_ctxt(cc, buf);
+ else if ((le16_to_cpu(cc->NameLength) == 16)) {
+ if (memcmp(name, smb3_create_tag_posix, 16) == 0)
+ parse_posix_ctxt(cc, NULL);
+ }
+ /* else {
+ cifs_dbg(FYI, "Context not matched with len %d\n",
+ le16_to_cpu(cc->NameLength));
+ cifs_dump_mem("Cctxt name: ", name, 4);
+ } */
next = le32_to_cpu(cc->Next);
if (!next)
@@ -3357,6 +3385,7 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
+ /* See note 354 of MS-SMB2, 64K max */
req->OutputBufferLength =
cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE);
req->CompletionFilter = cpu_to_le32(completion_filter);
@@ -4023,6 +4052,9 @@ smb2_writev_callback(struct mid_q_entry *mid)
wdata->cfile->fid.persistent_fid,
tcon->tid, tcon->ses->Suid, wdata->offset,
wdata->bytes, wdata->result);
+ if (wdata->result == -ENOSPC)
+ printk_once(KERN_WARNING "Out of space writing to %s\n",
+ tcon->treeName);
} else
trace_smb3_write_done(0 /* no xid */,
wdata->cfile->fid.persistent_fid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4c43dbd1e089..fa03df130f1a 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -1519,6 +1519,7 @@ struct smb3_fs_vol_info {
#define FILE_NORMALIZED_NAME_INFORMATION 48
#define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50
#define FILE_STANDARD_LINK_INFORMATION 54
+#define FILE_ID_INFORMATION 59
struct smb2_file_internal_info {
__le64 IndexNumber;
@@ -1593,6 +1594,21 @@ struct smb2_file_network_open_info {
__le32 Reserved;
} __packed; /* level 34 Query also similar returned in close rsp and open rsp */
+/* See MS-FSCC 2.4.43 */
+struct smb2_file_id_information {
+ __le64 VolumeSerialNumber;
+ __u64 PersistentFileId; /* opaque endianness */
+ __u64 VolatileFileId; /* opaque endianness */
+} __packed; /* level 59 */
+
extern char smb2_padding[7];
+/* equivalent of the contents of SMB3.1.1 POSIX open context response */
+struct smb_posix_info {
+ __le32 nlink;
+ __le32 reparse_tag;
+ __le32 mode;
+ kuid_t uid;
+ kuid_t gid;
+};
#endif /* _SMB2PDU_H */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 6c678e00046f..de6388ef344f 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -68,7 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
struct mid_q_entry *mid);
extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_fid *pfid);
+ struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid);
extern void close_shroot(struct cached_fid *cfid);
extern void close_shroot_lease(struct cached_fid *cfid);
extern void close_shroot_lease_locked(struct cached_fid *cfid);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index fe6acfce3390..08b703b7a15e 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -104,13 +104,14 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
struct cifs_chan *chan;
struct cifs_ses *ses = NULL;
+ struct TCP_Server_Info *it = NULL;
int i;
int rc = 0;
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
- list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) {
+ list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) {
if (ses->Suid == ses_id)
goto found;
}
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index e7e350b13d6a..4cb0d5f7ce45 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -547,6 +547,33 @@ DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \
DEFINE_SMB3_EXIT_ERR_EVENT(exit_err);
+
+DECLARE_EVENT_CLASS(smb3_sync_err_class,
+ TP_PROTO(unsigned long ino,
+ int rc),
+ TP_ARGS(ino, rc),
+ TP_STRUCT__entry(
+ __field(unsigned long, ino)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->ino = ino;
+ __entry->rc = rc;
+ ),
+ TP_printk("\tino=%lu rc=%d",
+ __entry->ino, __entry->rc)
+)
+
+#define DEFINE_SMB3_SYNC_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_sync_err_class, cifs_##name, \
+ TP_PROTO(unsigned long ino, \
+ int rc), \
+ TP_ARGS(ino, rc))
+
+DEFINE_SMB3_SYNC_ERR_EVENT(fsync_err);
+DEFINE_SMB3_SYNC_ERR_EVENT(flush_err);
+
+
DECLARE_EVENT_CLASS(smb3_enter_exit_class,
TP_PROTO(unsigned int xid,
const char *func_name),
diff --git a/fs/coredump.c b/fs/coredump.c
index b1ea7dfbd149..f8296a82d01d 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file)
pipe_lock(pipe);
pipe->readers++;
pipe->writers--;
- wake_up_interruptible_sync(&pipe->wait);
+ wake_up_interruptible_sync(&pipe->rd_wait);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
pipe_unlock(pipe);
@@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file)
* We actually want wait_event_freezable() but then we need
* to clear TIF_SIGPENDING and improve dump_interrupted().
*/
- wait_event_interruptible(pipe->wait, pipe->readers == 1);
+ wait_event_interruptible(pipe->rd_wait, pipe->readers == 1);
pipe_lock(pipe);
pipe->readers--;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 2f04024c3588..912308600d39 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -534,7 +534,7 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
/* check for wrong endianness */
if (super->magic == CRAMFS_MAGIC_WEND) {
if (!silent)
- errorf(fc, "cramfs: wrong endianness");
+ errorfc(fc, "wrong endianness");
return -EINVAL;
}
@@ -546,22 +546,22 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
mutex_unlock(&read_mutex);
if (super->magic != CRAMFS_MAGIC) {
if (super->magic == CRAMFS_MAGIC_WEND && !silent)
- errorf(fc, "cramfs: wrong endianness");
+ errorfc(fc, "wrong endianness");
else if (!silent)
- errorf(fc, "cramfs: wrong magic");
+ errorfc(fc, "wrong magic");
return -EINVAL;
}
}
/* get feature flags first */
if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) {
- errorf(fc, "cramfs: unsupported filesystem features");
+ errorfc(fc, "unsupported filesystem features");
return -EINVAL;
}
/* Check that the root inode is in a sane state */
if (!S_ISDIR(super->root.mode)) {
- errorf(fc, "cramfs: root is not a directory");
+ errorfc(fc, "root is not a directory");
return -EINVAL;
}
/* correct strange, hard-coded permissions of mkcramfs */
@@ -580,12 +580,12 @@ static int cramfs_read_super(struct super_block *sb, struct fs_context *fc,
sbi->magic = super->magic;
sbi->flags = super->flags;
if (root_offset == 0)
- infof(fc, "cramfs: empty filesystem");
+ infofc(fc, "empty filesystem");
else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) &&
((root_offset != sizeof(struct cramfs_super)) &&
(root_offset != 512 + sizeof(struct cramfs_super))))
{
- errorf(fc, "cramfs: bad root offset %lu", root_offset);
+ errorfc(fc, "bad root offset %lu", root_offset);
return -EINVAL;
}
diff --git a/fs/dax.c b/fs/dax.c
index 1f1f0201cad1..35da144375a0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -937,12 +937,11 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
* on persistent storage prior to completion of the operation.
*/
int dax_writeback_mapping_range(struct address_space *mapping,
- struct block_device *bdev, struct writeback_control *wbc)
+ struct dax_device *dax_dev, struct writeback_control *wbc)
{
XA_STATE(xas, &mapping->i_pages, wbc->range_start >> PAGE_SHIFT);
struct inode *inode = mapping->host;
pgoff_t end_index = wbc->range_end >> PAGE_SHIFT;
- struct dax_device *dax_dev;
void *entry;
int ret = 0;
unsigned int scanned = 0;
@@ -953,10 +952,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
return 0;
- dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
- if (!dax_dev)
- return -EIO;
-
trace_dax_writeback_range(inode, xas.xa_index, end_index);
tag_pages_for_writeback(mapping, xas.xa_index, end_index);
@@ -977,7 +972,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
xas_lock_irq(&xas);
}
xas_unlock_irq(&xas);
- put_dax(dax_dev);
trace_dax_writeback_range_done(inode, xas.xa_index, end_index);
return ret;
}
@@ -1207,6 +1201,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
lockdep_assert_held(&inode->i_rwsem);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ flags |= IOMAP_NOWAIT;
+
while (iov_iter_count(iter)) {
ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops,
iter, dax_iomap_actor);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index cffa0c1ec829..019572c6b39a 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -524,16 +524,12 @@ out:
static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
{
- int rc = 0;
- struct inode *inode;
- struct inode *lower_inode;
-
- inode = (struct inode *)mapping->host;
- lower_inode = ecryptfs_inode_to_lower(inode);
- if (lower_inode->i_mapping->a_ops->bmap)
- rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping,
- block);
- return rc;
+ struct inode *lower_inode = ecryptfs_inode_to_lower(mapping->host);
+ int ret = bmap(lower_inode, &block);
+
+ if (ret)
+ return 0;
+ return block;
}
const struct address_space_operations ecryptfs_aops = {
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 119667e65890..c885cf7d724b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -960,8 +960,9 @@ ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
static int
ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
- return dax_writeback_mapping_range(mapping,
- mapping->host->i_sb->s_bdev, wbc);
+ struct ext2_sb_info *sbi = EXT2_SB(mapping->host->i_sb);
+
+ return dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
}
const struct address_space_operations ext2_aops = {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3313168b680f..1305b810c44a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2867,7 +2867,7 @@ static int ext4_dax_writepages(struct address_space *mapping,
percpu_down_read(&sbi->s_journal_flag_rwsem);
trace_ext4_writepages(inode, wbc);
- ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+ ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
percpu_up_read(&sbi->s_journal_flag_rwsem);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8bd9afa81c54..b27b72107911 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3666,12 +3666,16 @@ static int check_swap_activate(struct swap_info_struct *sis,
page_no < sis->max) {
unsigned block_in_page;
sector_t first_block;
+ sector_t block = 0;
+ int err = 0;
cond_resched();
- first_block = bmap(inode, probe_block);
- if (first_block == 0)
+ block = probe_block;
+ err = bmap(inode, &block);
+ if (err || !block)
goto bad_bmap;
+ first_block = block;
/*
* It must be PAGE_SIZE aligned on-disk
@@ -3683,11 +3687,13 @@ static int check_swap_activate(struct swap_info_struct *sis,
for (block_in_page = 1; block_in_page < blocks_per_page;
block_in_page++) {
- sector_t block;
- block = bmap(inode, probe_block + block_in_page);
- if (block == 0)
+ block = probe_block + block_in_page;
+ err = bmap(inode, &block);
+
+ if (err || !block)
goto bad_bmap;
+
if (block != first_block + block_in_page) {
/* Discontiguity */
probe_block++;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 9135646e41ac..77bf5f95362d 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,8 @@ int register_filesystem(struct file_system_type * fs)
int res = 0;
struct file_system_type ** p;
- if (fs->parameters && !fs_validate_description(fs->parameters))
+ if (fs->parameters &&
+ !fs_validate_description(fs->name, fs->parameters))
return -EINVAL;
BUG_ON(strchr(fs->name, '.'));
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 138b5b4d621d..fc9f6ef93b55 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -45,6 +45,7 @@ static const struct constant_table common_set_sb_flag[] = {
{ "posixacl", SB_POSIXACL },
{ "ro", SB_RDONLY },
{ "sync", SB_SYNCHRONOUS },
+ { },
};
static const struct constant_table common_clear_sb_flag[] = {
@@ -53,6 +54,7 @@ static const struct constant_table common_clear_sb_flag[] = {
{ "nomand", SB_MANDLOCK },
{ "rw", SB_RDONLY },
{ "silent", SB_SILENT },
+ { },
};
static const char *const forbidden_sb_flag[] = {
@@ -175,14 +177,15 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key,
struct fs_parameter param = {
.key = key,
- .type = fs_value_is_string,
+ .type = fs_value_is_flag,
.size = v_size,
};
- if (v_size > 0) {
+ if (value) {
param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
if (!param.string)
return -ENOMEM;
+ param.type = fs_value_is_string;
}
ret = vfs_parse_fs_param(fc, &param);
@@ -268,6 +271,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
fc->fs_type = get_filesystem(fs_type);
fc->cred = get_current_cred();
fc->net_ns = get_net(current->nsproxy->net_ns);
+ fc->log.prefix = fs_type->name;
mutex_init(&fc->uapi_mutex);
@@ -361,8 +365,8 @@ struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
get_net(fc->net_ns);
get_user_ns(fc->user_ns);
get_cred(fc->cred);
- if (fc->log)
- refcount_inc(&fc->log->usage);
+ if (fc->log.log)
+ refcount_inc(&fc->log.log->usage);
/* Can't call put until we've called ->dup */
ret = fc->ops->dup(fc, src_fc);
@@ -385,64 +389,33 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
* @fc: The filesystem context to log to.
* @fmt: The format of the buffer.
*/
-void logfc(struct fs_context *fc, const char *fmt, ...)
+void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
{
- static const char store_failure[] = "OOM: Can't store error string";
- struct fc_log *log = fc ? fc->log : NULL;
- const char *p;
va_list va;
- char *q;
- u8 freeable;
+ struct va_format vaf = {.fmt = fmt, .va = &va};
va_start(va, fmt);
- if (!strchr(fmt, '%')) {
- p = fmt;
- goto unformatted_string;
- }
- if (strcmp(fmt, "%s") == 0) {
- p = va_arg(va, const char *);
- goto unformatted_string;
- }
-
- q = kvasprintf(GFP_KERNEL, fmt, va);
-copied_string:
- if (!q)
- goto store_failure;
- freeable = 1;
- goto store_string;
-
-unformatted_string:
- if ((unsigned long)p >= (unsigned long)__start_rodata &&
- (unsigned long)p < (unsigned long)__end_rodata)
- goto const_string;
- if (log && within_module_core((unsigned long)p, log->owner))
- goto const_string;
- q = kstrdup(p, GFP_KERNEL);
- goto copied_string;
-
-store_failure:
- p = store_failure;
-const_string:
- q = (char *)p;
- freeable = 0;
-store_string:
if (!log) {
- switch (fmt[0]) {
+ switch (level) {
case 'w':
- printk(KERN_WARNING "%s\n", q + 2);
+ printk(KERN_WARNING "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
break;
case 'e':
- printk(KERN_ERR "%s\n", q + 2);
+ printk(KERN_ERR "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
break;
default:
- printk(KERN_NOTICE "%s\n", q + 2);
+ printk(KERN_NOTICE "%s%s%pV\n", prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
break;
}
- if (freeable)
- kfree(q);
} else {
unsigned int logsize = ARRAY_SIZE(log->buffer);
u8 index;
+ char *q = kasprintf(GFP_KERNEL, "%c %s%s%pV\n", level,
+ prefix ? prefix : "",
+ prefix ? ": " : "", &vaf);
index = log->head & (logsize - 1);
BUILD_BUG_ON(sizeof(log->head) != sizeof(u8) ||
@@ -454,9 +427,11 @@ store_string:
log->tail++;
}
- log->buffer[index] = q;
- log->need_free &= ~(1 << index);
- log->need_free |= freeable << index;
+ log->buffer[index] = q ? q : "OOM: Can't store error string";
+ if (q)
+ log->need_free |= 1 << index;
+ else
+ log->need_free &= ~(1 << index);
log->head++;
}
va_end(va);
@@ -468,12 +443,12 @@ EXPORT_SYMBOL(logfc);
*/
static void put_fc_log(struct fs_context *fc)
{
- struct fc_log *log = fc->log;
+ struct fc_log *log = fc->log.log;
int i;
if (log) {
if (refcount_dec_and_test(&log->usage)) {
- fc->log = NULL;
+ fc->log.log = NULL;
for (i = 0; i <= 7; i++)
if (log->need_free & (1 << i))
kfree(log->buffer[i]);
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index d1930adce68d..7e6fb43f9541 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -20,42 +20,66 @@ static const struct constant_table bool_names[] = {
{ "no", false },
{ "true", true },
{ "yes", true },
+ { },
};
+static const struct constant_table *
+__lookup_constant(const struct constant_table *tbl, const char *name)
+{
+ for ( ; tbl->name; tbl++)
+ if (strcmp(name, tbl->name) == 0)
+ return tbl;
+ return NULL;
+}
+
/**
* lookup_constant - Look up a constant by name in an ordered table
* @tbl: The table of constants to search.
- * @tbl_size: The size of the table.
* @name: The name to look up.
* @not_found: The value to return if the name is not found.
*/
-int __lookup_constant(const struct constant_table *tbl, size_t tbl_size,
- const char *name, int not_found)
+int lookup_constant(const struct constant_table *tbl, const char *name, int not_found)
{
- unsigned int i;
+ const struct constant_table *p = __lookup_constant(tbl, name);
- for (i = 0; i < tbl_size; i++)
- if (strcmp(name, tbl[i].name) == 0)
- return tbl[i].value;
+ return p ? p->value : not_found;
+}
+EXPORT_SYMBOL(lookup_constant);
- return not_found;
+static inline bool is_flag(const struct fs_parameter_spec *p)
+{
+ return p->type == NULL;
}
-EXPORT_SYMBOL(__lookup_constant);
static const struct fs_parameter_spec *fs_lookup_key(
- const struct fs_parameter_description *desc,
- const char *name)
+ const struct fs_parameter_spec *desc,
+ struct fs_parameter *param, bool *negated)
{
- const struct fs_parameter_spec *p;
-
- if (!desc->specs)
- return NULL;
-
- for (p = desc->specs; p->name; p++)
- if (strcmp(p->name, name) == 0)
+ const struct fs_parameter_spec *p, *other = NULL;
+ const char *name = param->key;
+ bool want_flag = param->type == fs_value_is_flag;
+
+ *negated = false;
+ for (p = desc; p->name; p++) {
+ if (strcmp(p->name, name) != 0)
+ continue;
+ if (likely(is_flag(p) == want_flag))
return p;
-
- return NULL;
+ other = p;
+ }
+ if (want_flag) {
+ if (name[0] == 'n' && name[1] == 'o' && name[2]) {
+ for (p = desc; p->name; p++) {
+ if (strcmp(p->name, name + 2) != 0)
+ continue;
+ if (!(p->flags & fs_param_neg_with_no))
+ continue;
+ *negated = true;
+ return p;
+ }
+ }
+ }
+ return other;
}
/*
@@ -76,172 +100,38 @@ static const struct fs_parameter_spec *fs_lookup_key(
* unknown parameters are okay and -EINVAL if there was a conversion issue or
* the parameter wasn't recognised and unknowns aren't okay.
*/
-int fs_parse(struct fs_context *fc,
- const struct fs_parameter_description *desc,
+int __fs_parse(struct p_log *log,
+ const struct fs_parameter_spec *desc,
struct fs_parameter *param,
struct fs_parse_result *result)
{
const struct fs_parameter_spec *p;
- const struct fs_parameter_enum *e;
- int ret = -ENOPARAM, b;
- result->has_value = !!param->string;
- result->negated = false;
result->uint_64 = 0;
- p = fs_lookup_key(desc, param->key);
- if (!p) {
- /* If we didn't find something that looks like "noxxx", see if
- * "xxx" takes the "no"-form negative - but only if there
- * wasn't an value.
- */
- if (result->has_value)
- goto unknown_parameter;
- if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2])
- goto unknown_parameter;
-
- p = fs_lookup_key(desc, param->key + 2);
- if (!p)
- goto unknown_parameter;
- if (!(p->flags & fs_param_neg_with_no))
- goto unknown_parameter;
- result->boolean = false;
- result->negated = true;
- }
+ p = fs_lookup_key(desc, param, &result->negated);
+ if (!p)
+ return -ENOPARAM;
if (p->flags & fs_param_deprecated)
- warnf(fc, "%s: Deprecated parameter '%s'",
- desc->name, param->key);
-
- if (result->negated)
- goto okay;
-
- /* Certain parameter types only take a string and convert it. */
- switch (p->type) {
- case __fs_param_wasnt_defined:
- return -EINVAL;
- case fs_param_is_u32:
- case fs_param_is_u32_octal:
- case fs_param_is_u32_hex:
- case fs_param_is_s32:
- case fs_param_is_u64:
- case fs_param_is_enum:
- case fs_param_is_string:
- if (param->type != fs_value_is_string)
- goto bad_value;
- if (!result->has_value) {
- if (p->flags & fs_param_v_optional)
- goto okay;
- goto bad_value;
- }
- /* Fall through */
- default:
- break;
- }
+ warn_plog(log, "Deprecated parameter '%s'", param->key);
/* Try to turn the type we were given into the type desired by the
* parameter and give an error if we can't.
*/
- switch (p->type) {
- case fs_param_is_flag:
- if (param->type != fs_value_is_flag &&
- (param->type != fs_value_is_string || result->has_value))
- return invalf(fc, "%s: Unexpected value for '%s'",
- desc->name, param->key);
- result->boolean = true;
- goto okay;
-
- case fs_param_is_bool:
- switch (param->type) {
- case fs_value_is_flag:
- result->boolean = true;
- goto okay;
- case fs_value_is_string:
- if (param->size == 0) {
- result->boolean = true;
- goto okay;
- }
- b = lookup_constant(bool_names, param->string, -1);
- if (b == -1)
- goto bad_value;
- result->boolean = b;
- goto okay;
- default:
- goto bad_value;
- }
-
- case fs_param_is_u32:
- ret = kstrtouint(param->string, 0, &result->uint_32);
- goto maybe_okay;
- case fs_param_is_u32_octal:
- ret = kstrtouint(param->string, 8, &result->uint_32);
- goto maybe_okay;
- case fs_param_is_u32_hex:
- ret = kstrtouint(param->string, 16, &result->uint_32);
- goto maybe_okay;
- case fs_param_is_s32:
- ret = kstrtoint(param->string, 0, &result->int_32);
- goto maybe_okay;
- case fs_param_is_u64:
- ret = kstrtoull(param->string, 0, &result->uint_64);
- goto maybe_okay;
-
- case fs_param_is_enum:
- for (e = desc->enums; e->name[0]; e++) {
- if (e->opt == p->opt &&
- strcmp(e->name, param->string) == 0) {
- result->uint_32 = e->value;
- goto okay;
- }
- }
- goto bad_value;
-
- case fs_param_is_string:
- goto okay;
- case fs_param_is_blob:
- if (param->type != fs_value_is_blob)
- goto bad_value;
- goto okay;
-
- case fs_param_is_fd: {
- switch (param->type) {
- case fs_value_is_string:
- if (!result->has_value)
- goto bad_value;
-
- ret = kstrtouint(param->string, 0, &result->uint_32);
- break;
- case fs_value_is_file:
- result->uint_32 = param->dirfd;
- ret = 0;
- default:
- goto bad_value;
- }
-
- if (result->uint_32 > INT_MAX)
- goto bad_value;
- goto maybe_okay;
- }
-
- case fs_param_is_blockdev:
- case fs_param_is_path:
- goto okay;
- default:
- BUG();
+ if (is_flag(p)) {
+ if (param->type != fs_value_is_flag)
+ return inval_plog(log, "Unexpected value for '%s'",
+ param->key);
+ result->boolean = !result->negated;
+ } else {
+ int ret = p->type(log, p, param, result);
+ if (ret)
+ return ret;
}
-
-maybe_okay:
- if (ret < 0)
- goto bad_value;
-okay:
return p->opt;
-
-bad_value:
- return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key);
-unknown_parameter:
- return -ENOPARAM;
}
-EXPORT_SYMBOL(fs_parse);
+EXPORT_SYMBOL(__fs_parse);
/**
* fs_lookup_param - Look up a path referred to by a parameter
@@ -267,9 +157,6 @@ int fs_lookup_param(struct fs_context *fc,
return PTR_ERR(f);
put_f = true;
break;
- case fs_value_is_filename_empty:
- flags = LOOKUP_EMPTY;
- /* Fall through */
case fs_value_is_filename:
f = param->name;
put_f = false;
@@ -302,6 +189,124 @@ out:
}
EXPORT_SYMBOL(fs_lookup_param);
+int fs_param_bad_value(struct p_log *log, struct fs_parameter *param)
+{
+ return inval_plog(log, "Bad value for '%s'", param->key);
+}
+
+int fs_param_is_bool(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ int b;
+ if (param->type != fs_value_is_string)
+ return fs_param_bad_value(log, param);
+ b = lookup_constant(bool_names, param->string, -1);
+ if (b == -1)
+ return fs_param_bad_value(log, param);
+ result->boolean = b;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_bool);
+
+int fs_param_is_u32(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ int base = (unsigned long)p->data;
+ if (param->type != fs_value_is_string ||
+ kstrtouint(param->string, base, &result->uint_32) < 0)
+ return fs_param_bad_value(log, param);
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_u32);
+
+int fs_param_is_s32(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ if (param->type != fs_value_is_string ||
+ kstrtoint(param->string, 0, &result->int_32) < 0)
+ return fs_param_bad_value(log, param);
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_s32);
+
+int fs_param_is_u64(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ if (param->type != fs_value_is_string ||
+ kstrtoull(param->string, 0, &result->uint_64) < 0)
+ return fs_param_bad_value(log, param);
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_u64);
+
+int fs_param_is_enum(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ const struct constant_table *c;
+ if (param->type != fs_value_is_string)
+ return fs_param_bad_value(log, param);
+ c = __lookup_constant(p->data, param->string);
+ if (!c)
+ return fs_param_bad_value(log, param);
+ result->uint_32 = c->value;
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_enum);
+
+int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ if (param->type != fs_value_is_string || !*param->string)
+ return fs_param_bad_value(log, param);
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_string);
+
+int fs_param_is_blob(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ if (param->type != fs_value_is_blob)
+ return fs_param_bad_value(log, param);
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_blob);
+
+int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ switch (param->type) {
+ case fs_value_is_string:
+ if (kstrtouint(param->string, 0, &result->uint_32) < 0)
+ break;
+ if (result->uint_32 <= INT_MAX)
+ return 0;
+ break;
+ case fs_value_is_file:
+ result->uint_32 = param->dirfd;
+ if (result->uint_32 <= INT_MAX)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ return fs_param_bad_value(log, param);
+}
+EXPORT_SYMBOL(fs_param_is_fd);
+
+int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_blockdev);
+
+int fs_param_is_path(struct p_log *log, const struct fs_parameter_spec *p,
+ struct fs_parameter *param, struct fs_parse_result *result)
+{
+ return 0;
+}
+EXPORT_SYMBOL(fs_param_is_path);
+
#ifdef CONFIG_VALIDATE_FS_PARSER
/**
* validate_constant_table - Validate a constant table
@@ -357,102 +362,26 @@ bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
* fs_validate_description - Validate a parameter description
* @desc: The parameter description to validate.
*/
-bool fs_validate_description(const struct fs_parameter_description *desc)
+bool fs_validate_description(const char *name,
+ const struct fs_parameter_spec *desc)
{
const struct fs_parameter_spec *param, *p2;
- const struct fs_parameter_enum *e;
- const char *name = desc->name;
- unsigned int nr_params = 0;
- bool good = true, enums = false;
+ bool good = true;
pr_notice("*** VALIDATE %s ***\n", name);
- if (!name[0]) {
- pr_err("VALIDATE Parser: No name\n");
- name = "Unknown";
- good = false;
- }
-
- if (desc->specs) {
- for (param = desc->specs; param->name; param++) {
- enum fs_parameter_type t = param->type;
-
- /* Check that the type is in range */
- if (t == __fs_param_wasnt_defined ||
- t >= nr__fs_parameter_type) {
- pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n",
- name, param->name, t);
- good = false;
- } else if (t == fs_param_is_enum) {
- enums = true;
- }
-
- /* Check for duplicate parameter names */
- for (p2 = desc->specs; p2 < param; p2++) {
- if (strcmp(param->name, p2->name) == 0) {
- pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
- name, param->name);
- good = false;
- }
- }
- }
-
- nr_params = param - desc->specs;
- }
-
- if (desc->enums) {
- if (!nr_params) {
- pr_err("VALIDATE %s: Enum table but no parameters\n",
- name);
- good = false;
- goto no_enums;
- }
- if (!enums) {
- pr_err("VALIDATE %s: Enum table but no enum-type values\n",
- name);
- good = false;
- goto no_enums;
- }
-
- for (e = desc->enums; e->name[0]; e++) {
- /* Check that all entries in the enum table have at
- * least one parameter that uses them.
- */
- for (param = desc->specs; param->name; param++) {
- if (param->opt == e->opt &&
- param->type != fs_param_is_enum) {
- pr_err("VALIDATE %s: e[%tu] enum val for %s\n",
- name, e - desc->enums, param->name);
- good = false;
- }
- }
- }
-
- /* Check that all enum-type parameters have at least one enum
- * value in the enum table.
- */
- for (param = desc->specs; param->name; param++) {
- if (param->type != fs_param_is_enum)
- continue;
- for (e = desc->enums; e->name[0]; e++)
- if (e->opt == param->opt)
- break;
- if (!e->name[0]) {
- pr_err("VALIDATE %s: PARAM[%s] enum with no values\n",
+ for (param = desc; param->name; param++) {
+ /* Check for duplicate parameter names */
+ for (p2 = desc; p2 < param; p2++) {
+ if (strcmp(param->name, p2->name) == 0) {
+ if (is_flag(param) != is_flag(p2))
+ continue;
+ pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
name, param->name);
good = false;
}
}
- } else {
- if (enums) {
- pr_err("VALIDATE %s: enum-type values, but no enum table\n",
- name);
- good = false;
- goto no_enums;
- }
}
-
-no_enums:
return good;
}
#endif /* CONFIG_VALIDATE_FS_PARSER */
diff --git a/fs/fsopen.c b/fs/fsopen.c
index 043ffa8dc263..2fa3f241b762 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -25,7 +25,7 @@ static ssize_t fscontext_read(struct file *file,
char __user *_buf, size_t len, loff_t *pos)
{
struct fs_context *fc = file->private_data;
- struct fc_log *log = fc->log;
+ struct fc_log *log = fc->log.log;
unsigned int logsize = ARRAY_SIZE(log->buffer);
ssize_t ret;
char *p;
@@ -97,11 +97,11 @@ static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
static int fscontext_alloc_log(struct fs_context *fc)
{
- fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
- if (!fc->log)
+ fc->log.log = kzalloc(sizeof(*fc->log.log), GFP_KERNEL);
+ if (!fc->log.log)
return -ENOMEM;
- refcount_set(&fc->log->usage, 1);
- fc->log->owner = fc->fs_type->owner;
+ refcount_set(&fc->log.log->usage, 1);
+ fc->log.log->owner = fc->fs_type->owner;
return 0;
}
@@ -321,6 +321,7 @@ SYSCALL_DEFINE5(fsconfig,
struct fs_context *fc;
struct fd f;
int ret;
+ int lookup_flags = 0;
struct fs_parameter param = {
.type = fs_value_is_undefined,
@@ -409,19 +410,12 @@ SYSCALL_DEFINE5(fsconfig,
goto out_key;
}
break;
+ case FSCONFIG_SET_PATH_EMPTY:
+ lookup_flags = LOOKUP_EMPTY;
+ /* fallthru */
case FSCONFIG_SET_PATH:
param.type = fs_value_is_filename;
- param.name = getname_flags(_value, 0, NULL);
- if (IS_ERR(param.name)) {
- ret = PTR_ERR(param.name);
- goto out_key;
- }
- param.dirfd = aux;
- param.size = strlen(param.name->name);
- break;
- case FSCONFIG_SET_PATH_EMPTY:
- param.type = fs_value_is_filename_empty;
- param.name = getname_flags(_value, LOOKUP_EMPTY, NULL);
+ param.name = getname_flags(_value, lookup_flags, NULL);
if (IS_ERR(param.name)) {
ret = PTR_ERR(param.name);
goto out_key;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 00015d851382..030f094910c3 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -451,8 +451,8 @@ static int cuse_send_init(struct cuse_conn *cc)
ap->args.out_args[0].size = sizeof(ia->out);
ap->args.out_args[0].value = &ia->out;
ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
- ap->args.out_argvar = 1;
- ap->args.out_pages = 1;
+ ap->args.out_argvar = true;
+ ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &ia->page;
ap->descs = &ia->desc;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ee190119f45c..de1e2fde60bd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -818,7 +818,7 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
struct fuse_conn *fc = get_fuse_conn(olddir);
int err;
- if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ce715380143c..9d67b830fb7a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -803,6 +803,10 @@ static int fuse_do_readpage(struct file *file, struct page *page)
attr_ver = fuse_get_attr_version(fc);
+ /* Don't overflow end offset */
+ if (pos + (desc.length - 1) == LLONG_MAX)
+ desc.length--;
+
fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ);
res = fuse_simple_request(fc, &ia.ap.args);
if (res < 0)
@@ -888,6 +892,14 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
ap->args.out_pages = true;
ap->args.page_zeroing = true;
ap->args.page_replace = true;
+
+ /* Don't overflow end offset */
+ if (pos + (count - 1) == LLONG_MAX) {
+ count--;
+ ap->descs[ap->num_pages - 1].length--;
+ }
+ WARN_ON((loff_t) (pos + count) < 0);
+
fuse_read_args_fill(ia, file, pos, count, FUSE_READ);
ia->read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
@@ -1397,9 +1409,9 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
}
if (write)
- ap->args.in_pages = 1;
+ ap->args.in_pages = true;
else
- ap->args.out_pages = 1;
+ ap->args.out_pages = true;
*nbytesp = nbytes;
@@ -1465,6 +1477,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
ia = NULL;
if (nres < 0) {
+ iov_iter_revert(iter, nbytes);
err = nres;
break;
}
@@ -1473,8 +1486,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
count -= nres;
res += nres;
pos += nres;
- if (nres != nbytes)
+ if (nres != nbytes) {
+ iov_iter_revert(iter, nbytes - nres);
break;
+ }
if (count) {
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 16aec32f7f3d..95d712d44ca1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -448,7 +448,7 @@ enum {
OPT_ERR
};
-static const struct fs_parameter_spec fuse_param_specs[] = {
+static const struct fs_parameter_spec fuse_fs_parameters[] = {
fsparam_string ("source", OPT_SOURCE),
fsparam_u32 ("fd", OPT_FD),
fsparam_u32oct ("rootmode", OPT_ROOTMODE),
@@ -462,68 +462,63 @@ static const struct fs_parameter_spec fuse_param_specs[] = {
{}
};
-static const struct fs_parameter_description fuse_fs_parameters = {
- .name = "fuse",
- .specs = fuse_param_specs,
-};
-
static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct fs_parse_result result;
struct fuse_fs_context *ctx = fc->fs_private;
int opt;
- opt = fs_parse(fc, &fuse_fs_parameters, param, &result);
+ opt = fs_parse(fc, fuse_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case OPT_SOURCE:
if (fc->source)
- return invalf(fc, "fuse: Multiple sources specified");
+ return invalfc(fc, "Multiple sources specified");
fc->source = param->string;
param->string = NULL;
break;
case OPT_SUBTYPE:
if (ctx->subtype)
- return invalf(fc, "fuse: Multiple subtypes specified");
+ return invalfc(fc, "Multiple subtypes specified");
ctx->subtype = param->string;
param->string = NULL;
return 0;
case OPT_FD:
ctx->fd = result.uint_32;
- ctx->fd_present = 1;
+ ctx->fd_present = true;
break;
case OPT_ROOTMODE:
if (!fuse_valid_type(result.uint_32))
- return invalf(fc, "fuse: Invalid rootmode");
+ return invalfc(fc, "Invalid rootmode");
ctx->rootmode = result.uint_32;
- ctx->rootmode_present = 1;
+ ctx->rootmode_present = true;
break;
case OPT_USER_ID:
ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
if (!uid_valid(ctx->user_id))
- return invalf(fc, "fuse: Invalid user_id");
- ctx->user_id_present = 1;
+ return invalfc(fc, "Invalid user_id");
+ ctx->user_id_present = true;
break;
case OPT_GROUP_ID:
ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
if (!gid_valid(ctx->group_id))
- return invalf(fc, "fuse: Invalid group_id");
- ctx->group_id_present = 1;
+ return invalfc(fc, "Invalid group_id");
+ ctx->group_id_present = true;
break;
case OPT_DEFAULT_PERMISSIONS:
- ctx->default_permissions = 1;
+ ctx->default_permissions = true;
break;
case OPT_ALLOW_OTHER:
- ctx->allow_other = 1;
+ ctx->allow_other = true;
break;
case OPT_MAX_READ:
@@ -532,7 +527,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
case OPT_BLKSIZE:
if (!ctx->is_bdev)
- return invalf(fc, "fuse: blksize only supported for fuseblk");
+ return invalfc(fc, "blksize only supported for fuseblk");
ctx->blksize = result.uint_32;
break;
@@ -997,7 +992,7 @@ void fuse_send_init(struct fuse_conn *fc)
/* Variable length argument used for backward compatibility
with interface version < 7.5. Rest of init_out is zeroed
by do_get_request(), so a short reply is not a problem */
- ia->args.out_argvar = 1;
+ ia->args.out_argvar = true;
ia->args.out_args[0].size = sizeof(ia->out);
ia->args.out_args[0].value = &ia->out;
ia->args.force = true;
@@ -1347,7 +1342,7 @@ static struct file_system_type fuse_fs_type = {
.name = "fuse",
.fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
.init_fs_context = fuse_init_fs_context,
- .parameters = &fuse_fs_parameters,
+ .parameters = fuse_fs_parameters,
.kill_sb = fuse_kill_sb_anon,
};
MODULE_ALIAS_FS("fuse");
@@ -1363,7 +1358,7 @@ static struct file_system_type fuseblk_fs_type = {
.owner = THIS_MODULE,
.name = "fuseblk",
.init_fs_context = fuse_init_fs_context,
- .parameters = &fuse_fs_parameters,
+ .parameters = fuse_fs_parameters,
.kill_sb = fuse_kill_sb_blk,
.fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
};
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 6a40f75a0d25..90e3f01bd796 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -332,7 +332,7 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
return -ENOMEM;
plus = fuse_use_readdirplus(inode, ctx);
- ap->args.out_pages = 1;
+ ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &page;
ap->descs = &desc;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9d58295ccf7a..cb26be6f4351 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct gfs2_inode *ip = GFS2_I(inode);
- ssize_t written = 0, ret;
+ ssize_t ret;
ret = gfs2_rsqa_alloc(ip);
if (ret)
@@ -867,68 +867,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
- goto out;
-
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
+ goto out_unlock;
ret = file_remove_privs(file);
if (ret)
- goto out2;
+ goto out_unlock;
ret = file_update_time(file);
if (ret)
- goto out2;
+ goto out_unlock;
if (iocb->ki_flags & IOCB_DIRECT) {
struct address_space *mapping = file->f_mapping;
- loff_t pos, endbyte;
- ssize_t buffered;
+ ssize_t buffered, ret2;
- written = gfs2_file_direct_write(iocb, from);
- if (written < 0 || !iov_iter_count(from))
- goto out2;
+ ret = gfs2_file_direct_write(iocb, from);
+ if (ret < 0 || !iov_iter_count(from))
+ goto out_unlock;
- ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
- if (unlikely(ret < 0))
- goto out2;
- buffered = ret;
+ iocb->ki_flags |= IOCB_DSYNC;
+ current->backing_dev_info = inode_to_bdi(inode);
+ buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+ current->backing_dev_info = NULL;
+ if (unlikely(buffered <= 0))
+ goto out_unlock;
/*
* We need to ensure that the page cache pages are written to
* disk and invalidated to preserve the expected O_DIRECT
- * semantics.
+ * semantics. If the writeback or invalidate fails, only report
+ * the direct I/O range as we don't know if the buffered pages
+ * made it to disk.
*/
- pos = iocb->ki_pos;
- endbyte = pos + buffered - 1;
- ret = filemap_write_and_wait_range(mapping, pos, endbyte);
- if (!ret) {
- iocb->ki_pos += buffered;
- written += buffered;
- invalidate_mapping_pages(mapping,
- pos >> PAGE_SHIFT,
- endbyte >> PAGE_SHIFT);
- } else {
- /*
- * We don't know how much we wrote, so just return
- * the number of bytes which were direct-written
- */
- }
+ iocb->ki_pos += buffered;
+ ret2 = generic_write_sync(iocb, buffered);
+ invalidate_mapping_pages(mapping,
+ (iocb->ki_pos - buffered) >> PAGE_SHIFT,
+ (iocb->ki_pos - 1) >> PAGE_SHIFT);
+ if (!ret || ret2 > 0)
+ ret += ret2;
} else {
+ current->backing_dev_info = inode_to_bdi(inode);
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
- if (likely(ret > 0))
+ current->backing_dev_info = NULL;
+ if (likely(ret > 0)) {
iocb->ki_pos += ret;
+ ret = generic_write_sync(iocb, ret);
+ }
}
-out2:
- current->backing_dev_info = NULL;
-out:
+out_unlock:
inode_unlock(inode);
- if (likely(ret > 0)) {
- /* Handle various SYNC-type writes */
- ret = generic_write_sync(iocb, ret);
- }
- return written ? written : ret;
+ return ret;
}
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index d9431724b788..c090d5ad3f22 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -422,7 +422,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd,
for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) {
if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) {
- if (lh.lh_sequence > head->lh_sequence)
+ if (lh.lh_sequence >= head->lh_sequence)
*head = lh;
else {
ret = true;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b3e904bcc02c..a1a8ef7ed3fd 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1252,6 +1252,7 @@ enum gfs2_param {
Opt_upgrade,
Opt_acl,
Opt_quota,
+ Opt_quota_flag,
Opt_suiddir,
Opt_data,
Opt_meta,
@@ -1266,17 +1267,11 @@ enum gfs2_param {
Opt_loccookie,
};
-enum opt_quota {
- Opt_quota_unset = 0,
- Opt_quota_off,
- Opt_quota_account,
- Opt_quota_on,
-};
-
-static const unsigned int opt_quota_values[] = {
- [Opt_quota_off] = GFS2_QUOTA_OFF,
- [Opt_quota_account] = GFS2_QUOTA_ACCOUNT,
- [Opt_quota_on] = GFS2_QUOTA_ON,
+static const struct constant_table gfs2_param_quota[] = {
+ {"off", GFS2_QUOTA_OFF},
+ {"account", GFS2_QUOTA_ACCOUNT},
+ {"on", GFS2_QUOTA_ON},
+ {}
};
enum opt_data {
@@ -1284,12 +1279,24 @@ enum opt_data {
Opt_data_ordered = GFS2_DATA_ORDERED,
};
+static const struct constant_table gfs2_param_data[] = {
+ {"writeback", Opt_data_writeback },
+ {"ordered", Opt_data_ordered },
+ {}
+};
+
enum opt_errors {
Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW,
Opt_errors_panic = GFS2_ERRORS_PANIC,
};
-static const struct fs_parameter_spec gfs2_param_specs[] = {
+static const struct constant_table gfs2_param_errors[] = {
+ {"withdraw", Opt_errors_withdraw },
+ {"panic", Opt_errors_panic },
+ {}
+};
+
+static const struct fs_parameter_spec gfs2_fs_parameters[] = {
fsparam_string ("lockproto", Opt_lockproto),
fsparam_string ("locktable", Opt_locktable),
fsparam_string ("hostdata", Opt_hostdata),
@@ -1302,11 +1309,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = {
fsparam_flag ("upgrade", Opt_upgrade),
fsparam_flag_no("acl", Opt_acl),
fsparam_flag_no("suiddir", Opt_suiddir),
- fsparam_enum ("data", Opt_data),
+ fsparam_enum ("data", Opt_data, gfs2_param_data),
fsparam_flag ("meta", Opt_meta),
fsparam_flag_no("discard", Opt_discard),
fsparam_s32 ("commit", Opt_commit),
- fsparam_enum ("errors", Opt_errors),
+ fsparam_enum ("errors", Opt_errors, gfs2_param_errors),
fsparam_s32 ("statfs_quantum", Opt_statfs_quantum),
fsparam_s32 ("statfs_percent", Opt_statfs_percent),
fsparam_s32 ("quota_quantum", Opt_quota_quantum),
@@ -1314,27 +1321,11 @@ static const struct fs_parameter_spec gfs2_param_specs[] = {
fsparam_flag_no("rgrplvb", Opt_rgrplvb),
fsparam_flag_no("loccookie", Opt_loccookie),
/* quota can be a flag or an enum so it gets special treatment */
- __fsparam(fs_param_is_enum, "quota", Opt_quota, fs_param_neg_with_no|fs_param_v_optional),
- {}
-};
-
-static const struct fs_parameter_enum gfs2_param_enums[] = {
- { Opt_quota, "off", Opt_quota_off },
- { Opt_quota, "account", Opt_quota_account },
- { Opt_quota, "on", Opt_quota_on },
- { Opt_data, "writeback", Opt_data_writeback },
- { Opt_data, "ordered", Opt_data_ordered },
- { Opt_errors, "withdraw", Opt_errors_withdraw },
- { Opt_errors, "panic", Opt_errors_panic },
+ fsparam_flag_no("quota", Opt_quota_flag),
+ fsparam_enum("quota", Opt_quota, gfs2_param_quota),
{}
};
-static const struct fs_parameter_description gfs2_fs_parameters = {
- .name = "gfs2",
- .specs = gfs2_param_specs,
- .enums = gfs2_param_enums,
-};
-
/* Parse a single mount parameter */
static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
@@ -1342,7 +1333,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct fs_parse_result result;
int o;
- o = fs_parse(fc, &gfs2_fs_parameters, param, &result);
+ o = fs_parse(fc, gfs2_fs_parameters, param, &result);
if (o < 0)
return o;
@@ -1370,7 +1361,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_debug:
if (result.boolean && args->ar_errors == GFS2_ERRORS_PANIC)
- return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive");
args->ar_debug = result.boolean;
break;
case Opt_upgrade:
@@ -1379,17 +1370,11 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_acl:
args->ar_posix_acl = result.boolean;
break;
+ case Opt_quota_flag:
+ args->ar_quota = result.negated ? GFS2_QUOTA_OFF : GFS2_QUOTA_ON;
+ break;
case Opt_quota:
- /* The quota option can be a flag or an enum. A non-zero int_32
- result means that we have an enum index. Otherwise we have
- to rely on the 'negated' flag to tell us whether 'quota' or
- 'noquota' was specified. */
- if (result.negated)
- args->ar_quota = GFS2_QUOTA_OFF;
- else if (result.int_32 > 0)
- args->ar_quota = opt_quota_values[result.int_32];
- else
- args->ar_quota = GFS2_QUOTA_ON;
+ args->ar_quota = result.int_32;
break;
case Opt_suiddir:
args->ar_suiddir = result.boolean;
@@ -1406,27 +1391,27 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_commit:
if (result.int_32 <= 0)
- return invalf(fc, "gfs2: commit mount option requires a positive numeric argument");
+ return invalfc(fc, "commit mount option requires a positive numeric argument");
args->ar_commit = result.int_32;
break;
case Opt_statfs_quantum:
if (result.int_32 < 0)
- return invalf(fc, "gfs2: statfs_quantum mount option requires a non-negative numeric argument");
+ return invalfc(fc, "statfs_quantum mount option requires a non-negative numeric argument");
args->ar_statfs_quantum = result.int_32;
break;
case Opt_quota_quantum:
if (result.int_32 <= 0)
- return invalf(fc, "gfs2: quota_quantum mount option requires a positive numeric argument");
+ return invalfc(fc, "quota_quantum mount option requires a positive numeric argument");
args->ar_quota_quantum = result.int_32;
break;
case Opt_statfs_percent:
if (result.int_32 < 0 || result.int_32 > 100)
- return invalf(fc, "gfs2: statfs_percent mount option requires a numeric argument between 0 and 100");
+ return invalfc(fc, "statfs_percent mount option requires a numeric argument between 0 and 100");
args->ar_statfs_percent = result.int_32;
break;
case Opt_errors:
if (args->ar_debug && result.uint_32 == GFS2_ERRORS_PANIC)
- return invalf(fc, "gfs2: -o debug and -o errors=panic are mutually exclusive");
+ return invalfc(fc, "-o debug and -o errors=panic are mutually exclusive");
args->ar_errors = result.uint_32;
break;
case Opt_barrier:
@@ -1439,7 +1424,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
args->ar_loccookie = result.boolean;
break;
default:
- return invalf(fc, "gfs2: invalid mount option: %s", param->key);
+ return invalfc(fc, "invalid mount option: %s", param->key);
}
return 0;
}
@@ -1465,27 +1450,27 @@ static int gfs2_reconfigure(struct fs_context *fc)
spin_unlock(&gt->gt_spin);
if (strcmp(newargs->ar_lockproto, oldargs->ar_lockproto)) {
- errorf(fc, "gfs2: reconfiguration of locking protocol not allowed");
+ errorfc(fc, "reconfiguration of locking protocol not allowed");
return -EINVAL;
}
if (strcmp(newargs->ar_locktable, oldargs->ar_locktable)) {
- errorf(fc, "gfs2: reconfiguration of lock table not allowed");
+ errorfc(fc, "reconfiguration of lock table not allowed");
return -EINVAL;
}
if (strcmp(newargs->ar_hostdata, oldargs->ar_hostdata)) {
- errorf(fc, "gfs2: reconfiguration of host data not allowed");
+ errorfc(fc, "reconfiguration of host data not allowed");
return -EINVAL;
}
if (newargs->ar_spectator != oldargs->ar_spectator) {
- errorf(fc, "gfs2: reconfiguration of spectator mode not allowed");
+ errorfc(fc, "reconfiguration of spectator mode not allowed");
return -EINVAL;
}
if (newargs->ar_localflocks != oldargs->ar_localflocks) {
- errorf(fc, "gfs2: reconfiguration of localflocks not allowed");
+ errorfc(fc, "reconfiguration of localflocks not allowed");
return -EINVAL;
}
if (newargs->ar_meta != oldargs->ar_meta) {
- errorf(fc, "gfs2: switching between gfs2 and gfs2meta not allowed");
+ errorfc(fc, "switching between gfs2 and gfs2meta not allowed");
return -EINVAL;
}
if (oldargs->ar_spectator)
@@ -1495,11 +1480,11 @@ static int gfs2_reconfigure(struct fs_context *fc)
if (fc->sb_flags & SB_RDONLY) {
error = gfs2_make_fs_ro(sdp);
if (error)
- errorf(fc, "gfs2: unable to remount read-only");
+ errorfc(fc, "unable to remount read-only");
} else {
error = gfs2_make_fs_rw(sdp);
if (error)
- errorf(fc, "gfs2: unable to remount read-write");
+ errorfc(fc, "unable to remount read-write");
}
}
sdp->sd_args = *newargs;
@@ -1644,7 +1629,7 @@ struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
.init_fs_context = gfs2_init_fs_context,
- .parameters = &gfs2_fs_parameters,
+ .parameters = gfs2_fs_parameters,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a66e425884d1..aff8642f0c2e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -73,7 +73,7 @@ enum hugetlb_param {
Opt_uid,
};
-static const struct fs_parameter_spec hugetlb_param_specs[] = {
+static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
fsparam_u32 ("gid", Opt_gid),
fsparam_string("min_size", Opt_min_size),
fsparam_u32 ("mode", Opt_mode),
@@ -84,11 +84,6 @@ static const struct fs_parameter_spec hugetlb_param_specs[] = {
{}
};
-static const struct fs_parameter_description hugetlb_fs_parameters = {
- .name = "hugetlbfs",
- .specs = hugetlb_param_specs,
-};
-
#ifdef CONFIG_NUMA
static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
struct inode *inode, pgoff_t index)
@@ -1171,7 +1166,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
unsigned long ps;
int opt;
- opt = fs_parse(fc, &hugetlb_fs_parameters, param, &result);
+ opt = fs_parse(fc, hugetlb_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -1233,7 +1228,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
}
bad_val:
- return invalf(fc, "hugetlbfs: Bad value '%s' for mount option '%s'\n",
+ return invalfc(fc, "Bad value '%s' for mount option '%s'\n",
param->string, param->key);
}
@@ -1358,7 +1353,7 @@ static int hugetlbfs_init_fs_context(struct fs_context *fc)
static struct file_system_type hugetlbfs_fs_type = {
.name = "hugetlbfs",
.init_fs_context = hugetlbfs_init_fs_context,
- .parameters = &hugetlb_fs_parameters,
+ .parameters = hugetlb_fs_parameters,
.kill_sb = kill_litter_super,
};
diff --git a/fs/inode.c b/fs/inode.c
index c7418b0b4168..7d57068b6b7a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1599,25 +1599,31 @@ retry:
}
EXPORT_SYMBOL(iput);
+#ifdef CONFIG_BLOCK
/**
* bmap - find a block number in a file
- * @inode: inode of file
- * @block: block to find
- *
- * Returns the block number on the device holding the inode that
- * is the disk block number for the block of the file requested.
- * That is, asked for block 4 of inode 1 the function will return the
- * disk block relative to the disk start that holds that block of the
- * file.
+ * @inode: inode owning the block number being requested
+ * @block: pointer containing the block to find
+ *
+ * Replaces the value in *block with the block number on the device holding
+ * corresponding to the requested block number in the file.
+ * That is, asked for block 4 of inode 1 the function will replace the
+ * 4 in *block, with disk block relative to the disk start that holds that
+ * block of the file.
+ *
+ * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
+ * hole, returns 0 and *block is also set to 0.
*/
-sector_t bmap(struct inode *inode, sector_t block)
+int bmap(struct inode *inode, sector_t *block)
{
- sector_t res = 0;
- if (inode->i_mapping->a_ops->bmap)
- res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
- return res;
+ if (!inode->i_mapping->a_ops->bmap)
+ return -EINVAL;
+
+ *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
+ return 0;
}
EXPORT_SYMBOL(bmap);
+#endif
/*
* With relative atime, only update atime if the previous atime is
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7c9a5df5a597..282d45be6f45 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -54,19 +54,32 @@ EXPORT_SYMBOL(vfs_ioctl);
static int ioctl_fibmap(struct file *filp, int __user *p)
{
- struct address_space *mapping = filp->f_mapping;
- int res, block;
+ struct inode *inode = file_inode(filp);
+ int error, ur_block;
+ sector_t block;
- /* do we support this mess? */
- if (!mapping->a_ops->bmap)
- return -EINVAL;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
- res = get_user(block, p);
- if (res)
- return res;
- res = mapping->a_ops->bmap(mapping, block);
- return put_user(res, p);
+
+ error = get_user(ur_block, p);
+ if (error)
+ return error;
+
+ if (ur_block < 0)
+ return -EINVAL;
+
+ block = ur_block;
+ error = bmap(inode, &block);
+
+ if (error)
+ ur_block = 0;
+ else
+ ur_block = block;
+
+ if (put_user(ur_block, p))
+ error = -EFAULT;
+
+ return error;
}
/**
@@ -523,13 +536,9 @@ static int compat_ioctl_preallocate(struct file *file, int mode,
static int file_ioctl(struct file *filp, unsigned int cmd, int __user *p)
{
- struct inode *inode = file_inode(filp);
-
switch (cmd) {
case FIBMAP:
return ioctl_fibmap(filp, p);
- case FIONREAD:
- return put_user(i_size_read(inode) - filp->f_pos, p);
case FS_IOC_RESVSP:
case FS_IOC_RESVSP64:
return ioctl_preallocate(filp, 0, p);
@@ -721,6 +730,13 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
case FIDEDUPERANGE:
return ioctl_file_dedupe_range(filp, argp);
+ case FIONREAD:
+ if (!S_ISREG(inode->i_mode))
+ return vfs_ioctl(filp, cmd, arg);
+
+ return put_user(i_size_read(inode) - filp->f_pos,
+ (int __user *)argp);
+
default:
if (S_ISREG(inode->i_mode))
return file_ioctl(filp, cmd, argp);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb8ca446d1ab..a49d0e670ddf 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -794,18 +794,22 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
{
int err = 0;
unsigned long long ret;
+ sector_t block = 0;
if (journal->j_inode) {
- ret = bmap(journal->j_inode, blocknr);
- if (ret)
- *retp = ret;
- else {
+ block = blocknr;
+ ret = bmap(journal->j_inode, &block);
+
+ if (ret || !block) {
printk(KERN_ALERT "%s: journal block not found "
"at offset %lu on %s\n",
__func__, blocknr, journal->j_devname);
err = -EIO;
jbd2_journal_abort(journal, err);
+ } else {
+ *retp = block;
}
+
} else {
*retp = blocknr; /* +journal->j_blk_offset */
}
@@ -1243,11 +1247,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
journal_t *jbd2_journal_init_inode(struct inode *inode)
{
journal_t *journal;
+ sector_t blocknr;
char *p;
- unsigned long long blocknr;
+ int err = 0;
+
+ blocknr = 0;
+ err = bmap(inode, &blocknr);
- blocknr = bmap(inode, 0);
- if (!blocknr) {
+ if (err || !blocknr) {
pr_err("%s: Cannot locate journal superblock\n",
__func__);
return NULL;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 0e6406c4f362..05d7878dfad1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -167,27 +167,21 @@ enum {
Opt_rp_size,
};
-static const struct fs_parameter_spec jffs2_param_specs[] = {
- fsparam_enum ("compr", Opt_override_compr),
- fsparam_u32 ("rp_size", Opt_rp_size),
- {}
-};
-
-static const struct fs_parameter_enum jffs2_param_enums[] = {
- { Opt_override_compr, "none", JFFS2_COMPR_MODE_NONE },
+static const struct constant_table jffs2_param_compr[] = {
+ {"none", JFFS2_COMPR_MODE_NONE },
#ifdef CONFIG_JFFS2_LZO
- { Opt_override_compr, "lzo", JFFS2_COMPR_MODE_FORCELZO },
+ {"lzo", JFFS2_COMPR_MODE_FORCELZO },
#endif
#ifdef CONFIG_JFFS2_ZLIB
- { Opt_override_compr, "zlib", JFFS2_COMPR_MODE_FORCEZLIB },
+ {"zlib", JFFS2_COMPR_MODE_FORCEZLIB },
#endif
{}
};
-const struct fs_parameter_description jffs2_fs_parameters = {
- .name = "jffs2",
- .specs = jffs2_param_specs,
- .enums = jffs2_param_enums,
+static const struct fs_parameter_spec jffs2_fs_parameters[] = {
+ fsparam_enum ("compr", Opt_override_compr, jffs2_param_compr),
+ fsparam_u32 ("rp_size", Opt_rp_size),
+ {}
};
static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
@@ -196,7 +190,7 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct jffs2_sb_info *c = fc->s_fs_info;
int opt;
- opt = fs_parse(fc, &jffs2_fs_parameters, param, &result);
+ opt = fs_parse(fc, jffs2_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -339,7 +333,7 @@ static struct file_system_type jffs2_fs_type = {
.owner = THIS_MODULE,
.name = "jffs2",
.init_fs_context = jffs2_init_fs_context,
- .parameters = &jffs2_fs_parameters,
+ .parameters = jffs2_fs_parameters,
.kill_sb = jffs2_kill_sb,
};
MODULE_ALIAS_FS("jffs2");
diff --git a/fs/namespace.c b/fs/namespace.c
index 5e1bf611a9eb..85b5f7bea82e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2979,39 +2979,10 @@ static void shrink_submounts(struct mount *mnt)
}
}
-/*
- * Some copy_from_user() implementations do not return the exact number of
- * bytes remaining to copy on a fault. But copy_mount_options() requires that.
- * Note that this function differs from copy_from_user() in that it will oops
- * on bad values of `to', rather than returning a short copy.
- */
-static long exact_copy_from_user(void *to, const void __user * from,
- unsigned long n)
-{
- char *t = to;
- const char __user *f = from;
- char c;
-
- if (!access_ok(from, n))
- return n;
-
- while (n) {
- if (__get_user(c, f)) {
- memset(t, 0, n);
- break;
- }
- *t++ = c;
- f++;
- n--;
- }
- return n;
-}
-
void *copy_mount_options(const void __user * data)
{
- int i;
- unsigned long size;
char *copy;
+ unsigned size;
if (!data)
return NULL;
@@ -3020,22 +2991,16 @@ void *copy_mount_options(const void __user * data)
if (!copy)
return ERR_PTR(-ENOMEM);
- /* We only care that *some* data at the address the user
- * gave us is valid. Just in case, we'll zero
- * the remainder of the page.
- */
- /* copy_from_user cannot cross TASK_SIZE ! */
- size = TASK_SIZE - (unsigned long)untagged_addr(data);
- if (size > PAGE_SIZE)
- size = PAGE_SIZE;
+ size = PAGE_SIZE - offset_in_page(data);
- i = size - exact_copy_from_user(copy, data, size);
- if (!i) {
+ if (copy_from_user(copy, data, size)) {
kfree(copy);
return ERR_PTR(-EFAULT);
}
- if (i != PAGE_SIZE)
- memset(copy + i, 0, PAGE_SIZE - i);
+ if (size != PAGE_SIZE) {
+ if (copy_from_user(copy + size, data + size, PAGE_SIZE - size))
+ memset(copy + size, 0, PAGE_SIZE - size);
+ }
return copy;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 295a7a21b774..40b6c5ac46c0 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config NFS_V4
config NFS_SWAP
bool "Provide swap over NFS support"
default n
- depends on NFS_FS
+ depends on NFS_FS && SWAP
select SUNRPC_SWAP
help
This option enables swapon to work on files located on NFS mounts.
@@ -196,3 +196,12 @@ config NFS_DEBUG
depends on NFS_FS && SUNRPC_DEBUG
select CRC32
default y
+
+config NFS_DISABLE_UDP_SUPPORT
+ bool "NFS: Disable NFS UDP protocol support"
+ depends on NFS_FS
+ default y
+ help
+ Choose Y here to disable the use of NFS over UDP. NFS over UDP
+ on modern networks (1Gb+) can lead to data corruption caused by
+ fragmentation during high loads.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 34cdeaecccf6..2433c3e03cfa 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,7 +9,7 @@ CFLAGS_nfstrace.o += -I$(src)
nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
io.o direct.o pagelist.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o nfstrace.o \
- export.o sysfs.o
+ export.o sysfs.o fs_context.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 03a20f5716c7..79ff172eb1c8 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -18,6 +18,7 @@
#include "callback.h"
#include "internal.h"
#include "nfs4session.h"
+#include "nfs4trace.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status
@@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
- if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) {
- if (cps.clp)
- nfs_put_client(cps.clp);
+ if (!cps.clp) {
+ trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident);
+ goto out_invalidcred;
+ }
+ if (!check_gss_callback_principal(cps.clp, rqstp)) {
+ trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident);
+ nfs_put_client(cps.clp);
goto out_invalidcred;
}
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 02110a30a49e..989c30c98511 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -474,6 +474,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
@@ -484,6 +485,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1;
break;
+#endif
default:
BUG();
}
@@ -580,8 +582,10 @@ static int nfs_start_lockd(struct nfs_server *server)
default:
nlm_init.protocol = IPPROTO_TCP;
break;
+#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
nlm_init.protocol = IPPROTO_UDP;
+#endif
}
host = nlmclnt_init(&nlm_init);
@@ -658,28 +662,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client);
* Create a version 2 or 3 client
*/
static int nfs_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data,
- struct nfs_subversion *nfs_mod)
+ const struct fs_context *fc)
{
+ const struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
- .hostname = data->nfs_server.hostname,
- .addr = (const struct sockaddr *)&data->nfs_server.address,
- .addrlen = data->nfs_server.addrlen,
- .nfs_mod = nfs_mod,
- .proto = data->nfs_server.protocol,
- .net = data->net,
+ .hostname = ctx->nfs_server.hostname,
+ .addr = (const struct sockaddr *)&ctx->nfs_server.address,
+ .addrlen = ctx->nfs_server.addrlen,
+ .nfs_mod = ctx->nfs_mod,
+ .proto = ctx->nfs_server.protocol,
+ .net = fc->net_ns,
.timeparms = &timeparms,
.cred = server->cred,
- .nconnect = data->nfs_server.nconnect,
+ .nconnect = ctx->nfs_server.nconnect,
.init_flags = (1UL << NFS_CS_REUSEPORT),
};
struct nfs_client *clp;
int error;
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
- if (data->flags & NFS_MOUNT_NORESVPORT)
+ nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+ ctx->timeo, ctx->retrans);
+ if (ctx->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
@@ -690,46 +694,46 @@ static int nfs_init_server(struct nfs_server *server,
server->nfs_client = clp;
/* Initialise the client representation from the mount data */
- server->flags = data->flags;
- server->options = data->options;
+ server->flags = ctx->flags;
+ server->options = ctx->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
+ if (ctx->rsize)
+ server->rsize = nfs_block_size(ctx->rsize, NULL);
+ if (ctx->wsize)
+ server->wsize = nfs_block_size(ctx->wsize, NULL);
- server->acregmin = data->acregmin * HZ;
- server->acregmax = data->acregmax * HZ;
- server->acdirmin = data->acdirmin * HZ;
- server->acdirmax = data->acdirmax * HZ;
+ server->acregmin = ctx->acregmin * HZ;
+ server->acregmax = ctx->acregmax * HZ;
+ server->acdirmin = ctx->acdirmin * HZ;
+ server->acdirmax = ctx->acdirmax * HZ;
/* Start lockd here, before we might error out */
error = nfs_start_lockd(server);
if (error < 0)
goto error;
- server->port = data->nfs_server.port;
- server->auth_info = data->auth_info;
+ server->port = ctx->nfs_server.port;
+ server->auth_info = ctx->auth_info;
error = nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ ctx->selected_flavor);
if (error < 0)
goto error;
/* Preserve the values of mount_server-related mount options */
- if (data->mount_server.addrlen) {
- memcpy(&server->mountd_address, &data->mount_server.address,
- data->mount_server.addrlen);
- server->mountd_addrlen = data->mount_server.addrlen;
+ if (ctx->mount_server.addrlen) {
+ memcpy(&server->mountd_address, &ctx->mount_server.address,
+ ctx->mount_server.addrlen);
+ server->mountd_addrlen = ctx->mount_server.addrlen;
}
- server->mountd_version = data->mount_server.version;
- server->mountd_port = data->mount_server.port;
- server->mountd_protocol = data->mount_server.protocol;
+ server->mountd_version = ctx->mount_server.version;
+ server->mountd_port = ctx->mount_server.port;
+ server->mountd_protocol = ctx->mount_server.protocol;
- server->namelen = data->namlen;
+ server->namelen = ctx->namlen;
return 0;
error:
@@ -951,9 +955,9 @@ EXPORT_SYMBOL_GPL(nfs_free_server);
* Create a version 2 or 3 volume record
* - keyed on server and FSID
*/
-struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs_create_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_server *server;
struct nfs_fattr *fattr;
int error;
@@ -970,18 +974,18 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
goto error;
/* Get a client representation */
- error = nfs_init_server(server, mount_info->parsed, nfs_mod);
+ error = nfs_init_server(server, fc);
if (error < 0)
goto error;
/* Probe the root fh to retrieve its FSID */
- error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr);
+ error = nfs_probe_fsinfo(server, ctx->mntfh, fattr);
if (error < 0)
goto error;
if (server->nfs_client->rpc_ops->version == 3) {
if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
server->namelen = NFS3_MAXNAMLEN;
- if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS))
+ if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS))
server->caps |= NFS_CAP_READDIRPLUS;
} else {
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
@@ -989,8 +993,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
}
if (!(fattr->valid & NFS_ATTR_FATTR)) {
- error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh,
- fattr, NULL, NULL);
+ error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
+ fattr, NULL, NULL);
if (error < 0) {
dprintk("nfs_create_server: getattr error = %d\n", -error);
goto error;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index fe57b2b5314a..4a841071d8a7 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -25,13 +25,32 @@
#include "internal.h"
#include "nfs4trace.h"
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
+
+static atomic_long_t nfs_active_delegations;
+static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+
+static void __nfs_free_delegation(struct nfs_delegation *delegation)
{
put_cred(delegation->cred);
delegation->cred = NULL;
kfree_rcu(delegation, rcu);
}
+static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+{
+ if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
+ atomic_long_dec(&nfs_active_delegations);
+ }
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+ nfs_mark_delegation_revoked(delegation);
+ __nfs_free_delegation(delegation);
+}
+
/**
* nfs_mark_delegation_referenced - set delegation's REFERENCED flag
* @delegation: delegation to process
@@ -343,7 +362,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
- clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ atomic_long_inc(&nfs_active_delegations);
}
}
@@ -423,6 +443,8 @@ add_new:
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
+ atomic_long_inc(&nfs_active_delegations);
+
trace_nfs4_set_delegation(inode, type);
spin_lock(&inode->i_lock);
@@ -432,7 +454,7 @@ add_new:
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
- nfs_free_delegation(delegation);
+ __nfs_free_delegation(delegation);
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
nfs_free_delegation(freeme);
@@ -479,7 +501,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
- if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) {
+ else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) {
struct inode *inode;
spin_lock(&delegation->lock);
@@ -488,6 +510,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
ret = true;
spin_unlock(&delegation->lock);
}
+ if (ret)
+ clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
ret = false;
@@ -607,6 +631,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
delegation = nfs_inode_detach_delegation(inode);
if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
nfs_free_delegation(delegation);
@@ -637,6 +662,40 @@ int nfs4_inode_return_delegation(struct inode *inode)
}
/**
+ * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * @inode: inode to process
+ *
+ * This routine is called on file close in order to determine if the
+ * inode delegation needs to be returned immediately.
+ */
+void nfs4_inode_return_delegation_on_close(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ struct nfs_delegation *ret = NULL;
+
+ if (!inode)
+ return;
+ rcu_read_lock();
+ delegation = nfs4_get_valid_delegation(inode);
+ if (!delegation)
+ goto out;
+ if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
+ atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+ spin_lock(&delegation->lock);
+ if (delegation->inode &&
+ list_empty(&NFS_I(inode)->open_files) &&
+ !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags);
+ ret = delegation;
+ }
+ spin_unlock(&delegation->lock);
+ }
+out:
+ rcu_read_unlock();
+ nfs_end_delegation_return(inode, ret, 0);
+}
+
+/**
* nfs4_inode_make_writeable
* @inode: pointer to inode
*
@@ -760,13 +819,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
rcu_read_unlock();
}
-static void nfs_mark_delegation_revoked(struct nfs_server *server,
- struct nfs_delegation *delegation)
-{
- set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
- delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
-}
-
static void nfs_revoke_delegation(struct inode *inode,
const nfs4_stateid *stateid)
{
@@ -794,7 +846,7 @@ static void nfs_revoke_delegation(struct inode *inode,
}
spin_unlock(&delegation->lock);
}
- nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+ nfs_mark_delegation_revoked(delegation);
ret = true;
out:
rcu_read_unlock();
@@ -833,7 +885,7 @@ void nfs_delegation_mark_returned(struct inode *inode,
delegation->stateid.seqid = stateid->seqid;
}
- nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
+ nfs_mark_delegation_revoked(delegation);
out_clear_returning:
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
@@ -1317,3 +1369,5 @@ out:
rcu_read_unlock();
return ret;
}
+
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 15d3484be028..31b84604d383 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -42,6 +42,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
int nfs4_inode_return_delegation(struct inode *inode);
+void nfs4_inode_return_delegation_on_close(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_evict_delegation(struct inode *inode);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..1320288ff9ec 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -58,7 +58,7 @@ static void nfs_readdir_clear_array(struct page*);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
.read = generic_read_dir,
- .iterate = nfs_readdir,
+ .iterate_shared = nfs_readdir,
.open = nfs_opendir,
.release = nfs_closedir,
.fsync = nfs_fsync_dir,
@@ -162,6 +162,17 @@ typedef struct {
bool eof;
} nfs_readdir_descriptor_t;
+static
+void nfs_readdir_init_array(struct page *page)
+{
+ struct nfs_cache_array *array;
+
+ array = kmap_atomic(page);
+ memset(array, 0, sizeof(struct nfs_cache_array));
+ array->eof_index = -1;
+ kunmap_atomic(array);
+}
+
/*
* we are freeing strings created by nfs_add_to_readdir_array()
*/
@@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page)
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
+ array->size = 0;
kunmap_atomic(array);
}
@@ -186,7 +198,7 @@ static
int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
{
string->len = len;
- string->name = kmemdup(name, len, GFP_KERNEL);
+ string->name = kmemdup_nul(name, len, GFP_KERNEL);
if (string->name == NULL)
return -ENOMEM;
/*
@@ -437,7 +449,8 @@ void nfs_force_use_readdirplus(struct inode *dir)
if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
!list_empty(&nfsi->open_files)) {
set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
- invalidate_mapping_pages(dir->i_mapping, 0, -1);
+ invalidate_mapping_pages(dir->i_mapping,
+ nfsi->page_index + 1, -1);
}
}
@@ -610,6 +623,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
+ nfs_readdir_init_array(page);
+
entry.prev_cookie = 0;
entry.cookie = desc->last_cookie;
entry.eof = 0;
@@ -626,8 +641,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
}
array = kmap(page);
- memset(array, 0, sizeof(struct nfs_cache_array));
- array->eof_index = -1;
status = nfs_readdir_alloc_pages(pages, array_size);
if (status < 0)
@@ -682,6 +695,7 @@ int nfs_readdir_filler(void *data, struct page* page)
unlock_page(page);
return 0;
error:
+ nfs_readdir_clear_array(page);
unlock_page(page);
return ret;
}
@@ -689,8 +703,6 @@ int nfs_readdir_filler(void *data, struct page* page)
static
void cache_page_release(nfs_readdir_descriptor_t *desc)
{
- if (!desc->page->mapping)
- nfs_readdir_clear_array(desc->page);
put_page(desc->page);
desc->page = NULL;
}
@@ -704,19 +716,32 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
/*
* Returns 0 if desc->dir_cookie was found on page desc->page_index
+ * and locks the page to prevent removal from the page cache.
*/
static
-int find_cache_page(nfs_readdir_descriptor_t *desc)
+int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
{
+ struct inode *inode = file_inode(desc->file);
+ struct nfs_inode *nfsi = NFS_I(inode);
int res;
desc->page = get_cache_page(desc);
if (IS_ERR(desc->page))
return PTR_ERR(desc->page);
-
- res = nfs_readdir_search_array(desc);
+ res = lock_page_killable(desc->page);
if (res != 0)
- cache_page_release(desc);
+ goto error;
+ res = -EAGAIN;
+ if (desc->page->mapping != NULL) {
+ res = nfs_readdir_search_array(desc);
+ if (res == 0) {
+ nfsi->page_index = desc->page_index;
+ return 0;
+ }
+ }
+ unlock_page(desc->page);
+error:
+ cache_page_release(desc);
return res;
}
@@ -731,7 +756,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
desc->last_cookie = 0;
}
do {
- res = find_cache_page(desc);
+ res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
}
@@ -770,7 +795,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
desc->eof = true;
kunmap(desc->page);
- cache_page_release(desc);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
(unsigned long long)*desc->dir_cookie, res);
return res;
@@ -816,13 +840,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
status = nfs_do_filldir(desc);
+ out_release:
+ nfs_readdir_clear_array(desc->page);
+ cache_page_release(desc);
out:
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
__func__, status);
return status;
- out_release:
- cache_page_release(desc);
- goto out;
}
/* The file offset position represents the dirent entry number. A
@@ -887,6 +911,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
res = nfs_do_filldir(desc);
+ unlock_page(desc->page);
+ cache_page_release(desc);
if (res < 0)
break;
} while (!desc->eof);
@@ -1142,10 +1168,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
if (fhandle == NULL || fattr == NULL || IS_ERR(label))
goto out;
- ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (ret < 0) {
- if (ret == -ESTALE || ret == -ENOENT)
+ switch (ret) {
+ case -ESTALE:
+ case -ENOENT:
ret = 0;
+ break;
+ case -ETIMEDOUT:
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+ ret = 1;
+ }
goto out;
}
ret = 0;
@@ -1408,7 +1441,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
goto out;
trace_nfs_lookup_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
if (error == -ENOENT)
goto no_entry;
if (error < 0) {
@@ -1683,7 +1716,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
d_drop(dentry);
if (fhandle->size == 0) {
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
+ error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL);
if (error)
goto out_error;
}
@@ -2312,11 +2345,11 @@ static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, s
/* Found an entry, is our attribute cache valid? */
if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
break;
+ if (!retry)
+ break;
err = -ECHILD;
if (!may_block)
goto out;
- if (!retry)
- goto out_zap;
spin_unlock(&inode->i_lock);
err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (err)
@@ -2353,7 +2386,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
- cred != cache->cred)
+ cred_fscmp(cred, cache->cred) != 0)
cache = NULL;
if (cache == NULL)
goto out;
@@ -2476,7 +2509,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
{
struct nfs_access_entry cache;
bool may_block = (mask & MAY_NOT_BLOCK) == 0;
- int cache_mask;
+ int cache_mask = -1;
int status;
trace_nfs_access_enter(inode);
@@ -2515,7 +2548,7 @@ out_cached:
if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
status = -EACCES;
out:
- trace_nfs_access_exit(inode, status);
+ trace_nfs_access_exit(inode, mask, cache_mask, status);
return status;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 040a50fd9bf3..b768a0b42e82 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
data->ds_commit_index);
/* verifier not set so always fail */
- if (verfp->committed < 0)
+ if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE)
return 1;
- return nfs_direct_cmp_verf(verfp, &data->verf);
+ return nfs_direct_cmp_verf(verfp, data->res.verf);
}
/**
@@ -824,7 +824,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.count;
+ hdr->good_bytes = hdr->args.offset + hdr->args.count -
+ hdr->io_start;
}
spin_unlock(&dreq->lock);
}
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index aec769a500a1..89bd5581f317 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -93,7 +93,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew,
key = container_of(ckey, struct nfs_dns_ent, h);
kfree(new->hostname);
- new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
+ new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL);
if (new->hostname) {
new->namelen = key->namelen;
nfs_dns_ent_update(cnew, ckey);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8eb731d9be3e..f96367a2463e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -204,44 +204,39 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
static int
nfs_file_fsync_commit(struct file *file, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
- int do_resend, status;
- int ret = 0;
+ int ret;
dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- status = nfs_commit_inode(inode, FLUSH_SYNC);
- if (status == 0)
- status = file_check_and_advance_wb_err(file);
- if (status < 0) {
- ret = status;
- goto out;
- }
- do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- if (do_resend)
- ret = -EAGAIN;
-out:
- return ret;
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret < 0)
+ return ret;
+ return file_check_and_advance_wb_err(file);
}
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- int ret;
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ int ret;
trace_nfs_fsync_enter(inode);
- do {
+ for (;;) {
ret = file_write_and_wait_range(file, start, end);
if (ret != 0)
break;
ret = nfs_file_fsync_commit(file, datasync);
- if (!ret)
- ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+ ret = pnfs_sync_inode(inode, !!datasync);
+ if (ret != 0)
+ break;
+ if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ break;
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
@@ -249,7 +244,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
start = 0;
end = LLONG_MAX;
- } while (ret == -EAGAIN);
+ }
trace_nfs_fsync_exit(inode, ret);
return ret;
@@ -489,7 +484,19 @@ static int nfs_launder_page(struct page *page)
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
+ unsigned long blocks;
+ long long isize;
struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
+ struct inode *inode = file->f_mapping->host;
+
+ spin_lock(&inode->i_lock);
+ blocks = inode->i_blocks;
+ isize = inode->i_size;
+ spin_unlock(&inode->i_lock);
+ if (blocks*512 < isize) {
+ pr_warn("swap activate: swapfile has holes\n");
+ return -EINVAL;
+ }
*span = sis->pages;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5657b7f2611f..bb9148b83166 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1266,9 +1266,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
int idx, u64 offset, u64 length,
- u32 status, int opnum, int error)
+ u32 *op_status, int opnum, int error)
{
struct nfs4_ff_layout_mirror *mirror;
+ u32 status = *op_status;
int err;
if (status == 0) {
@@ -1286,10 +1287,10 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ENOBUFS:
case -EPIPE:
case -EPERM:
- status = NFS4ERR_NXIO;
+ *op_status = status = NFS4ERR_NXIO;
break;
case -EACCES:
- status = NFS4ERR_ACCESS;
+ *op_status = status = NFS4ERR_ACCESS;
break;
default:
return;
@@ -1321,16 +1322,19 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
int new_idx = hdr->pgio_mirror_idx;
int err;
- trace_nfs4_pnfs_read(hdr, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
hdr->args.offset, hdr->args.count,
- hdr->res.op_status, OP_READ,
+ &hdr->res.op_status, OP_READ,
task->tk_status);
+ trace_ff_layout_read_error(hdr);
+ }
+
err = ff_layout_async_handle_error(task, hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ trace_nfs4_pnfs_read(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
@@ -1494,16 +1498,19 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
loff_t end_offs = 0;
int err;
- trace_nfs4_pnfs_write(hdr, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx,
hdr->args.offset, hdr->args.count,
- hdr->res.op_status, OP_WRITE,
+ &hdr->res.op_status, OP_WRITE,
task->tk_status);
+ trace_ff_layout_write_error(hdr);
+ }
+
err = ff_layout_async_handle_error(task, hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
+ trace_nfs4_pnfs_write(hdr, err);
clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
switch (err) {
@@ -1537,15 +1544,18 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
{
int err;
- trace_nfs4_pnfs_commit_ds(data, task->tk_status);
- if (task->tk_status < 0)
+ if (task->tk_status < 0) {
ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index,
data->args.offset, data->args.count,
- data->res.op_status, OP_COMMIT,
+ &data->res.op_status, OP_COMMIT,
task->tk_status);
+ trace_ff_layout_commit_error(data);
+ }
+
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
data->lseg, data->ds_commit_index);
+ trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
pnfs_generic_prepare_to_resend_writes(data);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
new file mode 100644
index 000000000000..e1b938457ab9
--- /dev/null
+++ b/fs/nfs/fs_context.c
@@ -0,0 +1,1440 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/fs/nfs/fs_context.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ * Conversion to new mount api Copyright (C) David Howells
+ *
+ * NFS mount handling.
+ *
+ * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com>
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include "nfs.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY NFSDBG_MOUNT
+
+#if IS_ENABLED(CONFIG_NFS_V3)
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
+#define NFS_MAX_CONNECTIONS 16
+
+enum nfs_param {
+ Opt_ac,
+ Opt_acdirmax,
+ Opt_acdirmin,
+ Opt_acl,
+ Opt_acregmax,
+ Opt_acregmin,
+ Opt_actimeo,
+ Opt_addr,
+ Opt_bg,
+ Opt_bsize,
+ Opt_clientaddr,
+ Opt_cto,
+ Opt_fg,
+ Opt_fscache,
+ Opt_fscache_flag,
+ Opt_hard,
+ Opt_intr,
+ Opt_local_lock,
+ Opt_lock,
+ Opt_lookupcache,
+ Opt_migration,
+ Opt_minorversion,
+ Opt_mountaddr,
+ Opt_mounthost,
+ Opt_mountport,
+ Opt_mountproto,
+ Opt_mountvers,
+ Opt_namelen,
+ Opt_nconnect,
+ Opt_port,
+ Opt_posix,
+ Opt_proto,
+ Opt_rdirplus,
+ Opt_rdma,
+ Opt_resvport,
+ Opt_retrans,
+ Opt_retry,
+ Opt_rsize,
+ Opt_sec,
+ Opt_sharecache,
+ Opt_sloppy,
+ Opt_soft,
+ Opt_softerr,
+ Opt_softreval,
+ Opt_source,
+ Opt_tcp,
+ Opt_timeo,
+ Opt_udp,
+ Opt_v,
+ Opt_vers,
+ Opt_wsize,
+};
+
+enum {
+ Opt_local_lock_all,
+ Opt_local_lock_flock,
+ Opt_local_lock_none,
+ Opt_local_lock_posix,
+};
+
+static const struct constant_table nfs_param_enums_local_lock[] = {
+ { "all", Opt_local_lock_all },
+ { "flock", Opt_local_lock_flock },
+ { "none", Opt_local_lock_none },
+ {}
+};
+
+enum {
+ Opt_lookupcache_all,
+ Opt_lookupcache_none,
+ Opt_lookupcache_positive,
+};
+
+static const struct constant_table nfs_param_enums_lookupcache[] = {
+ { "all", Opt_lookupcache_all },
+ { "none", Opt_lookupcache_none },
+ { "pos", Opt_lookupcache_positive },
+ { "positive", Opt_lookupcache_positive },
+ {}
+};
+
+static const struct fs_parameter_spec nfs_fs_parameters[] = {
+ fsparam_flag_no("ac", Opt_ac),
+ fsparam_u32 ("acdirmax", Opt_acdirmax),
+ fsparam_u32 ("acdirmin", Opt_acdirmin),
+ fsparam_flag_no("acl", Opt_acl),
+ fsparam_u32 ("acregmax", Opt_acregmax),
+ fsparam_u32 ("acregmin", Opt_acregmin),
+ fsparam_u32 ("actimeo", Opt_actimeo),
+ fsparam_string("addr", Opt_addr),
+ fsparam_flag ("bg", Opt_bg),
+ fsparam_u32 ("bsize", Opt_bsize),
+ fsparam_string("clientaddr", Opt_clientaddr),
+ fsparam_flag_no("cto", Opt_cto),
+ fsparam_flag ("fg", Opt_fg),
+ fsparam_flag_no("fsc", Opt_fscache_flag),
+ fsparam_string("fsc", Opt_fscache),
+ fsparam_flag ("hard", Opt_hard),
+ __fsparam(NULL, "intr", Opt_intr,
+ fs_param_neg_with_no|fs_param_deprecated, NULL),
+ fsparam_enum ("local_lock", Opt_local_lock, nfs_param_enums_local_lock),
+ fsparam_flag_no("lock", Opt_lock),
+ fsparam_enum ("lookupcache", Opt_lookupcache, nfs_param_enums_lookupcache),
+ fsparam_flag_no("migration", Opt_migration),
+ fsparam_u32 ("minorversion", Opt_minorversion),
+ fsparam_string("mountaddr", Opt_mountaddr),
+ fsparam_string("mounthost", Opt_mounthost),
+ fsparam_u32 ("mountport", Opt_mountport),
+ fsparam_string("mountproto", Opt_mountproto),
+ fsparam_u32 ("mountvers", Opt_mountvers),
+ fsparam_u32 ("namlen", Opt_namelen),
+ fsparam_u32 ("nconnect", Opt_nconnect),
+ fsparam_string("nfsvers", Opt_vers),
+ fsparam_u32 ("port", Opt_port),
+ fsparam_flag_no("posix", Opt_posix),
+ fsparam_string("proto", Opt_proto),
+ fsparam_flag_no("rdirplus", Opt_rdirplus),
+ fsparam_flag ("rdma", Opt_rdma),
+ fsparam_flag_no("resvport", Opt_resvport),
+ fsparam_u32 ("retrans", Opt_retrans),
+ fsparam_string("retry", Opt_retry),
+ fsparam_u32 ("rsize", Opt_rsize),
+ fsparam_string("sec", Opt_sec),
+ fsparam_flag_no("sharecache", Opt_sharecache),
+ fsparam_flag ("sloppy", Opt_sloppy),
+ fsparam_flag ("soft", Opt_soft),
+ fsparam_flag ("softerr", Opt_softerr),
+ fsparam_flag ("softreval", Opt_softreval),
+ fsparam_string("source", Opt_source),
+ fsparam_flag ("tcp", Opt_tcp),
+ fsparam_u32 ("timeo", Opt_timeo),
+ fsparam_flag ("udp", Opt_udp),
+ fsparam_flag ("v2", Opt_v),
+ fsparam_flag ("v3", Opt_v),
+ fsparam_flag ("v4", Opt_v),
+ fsparam_flag ("v4.0", Opt_v),
+ fsparam_flag ("v4.1", Opt_v),
+ fsparam_flag ("v4.2", Opt_v),
+ fsparam_string("vers", Opt_vers),
+ fsparam_u32 ("wsize", Opt_wsize),
+ {}
+};
+
+enum {
+ Opt_vers_2,
+ Opt_vers_3,
+ Opt_vers_4,
+ Opt_vers_4_0,
+ Opt_vers_4_1,
+ Opt_vers_4_2,
+};
+
+static const struct constant_table nfs_vers_tokens[] = {
+ { "2", Opt_vers_2 },
+ { "3", Opt_vers_3 },
+ { "4", Opt_vers_4 },
+ { "4.0", Opt_vers_4_0 },
+ { "4.1", Opt_vers_4_1 },
+ { "4.2", Opt_vers_4_2 },
+};
+
+enum {
+ Opt_xprt_rdma,
+ Opt_xprt_rdma6,
+ Opt_xprt_tcp,
+ Opt_xprt_tcp6,
+ Opt_xprt_udp,
+ Opt_xprt_udp6,
+ nr__Opt_xprt
+};
+
+static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = {
+ { "rdma", Opt_xprt_rdma },
+ { "rdma6", Opt_xprt_rdma6 },
+ { "tcp", Opt_xprt_tcp },
+ { "tcp6", Opt_xprt_tcp6 },
+ { "udp", Opt_xprt_udp },
+ { "udp6", Opt_xprt_udp6 },
+};
+
+enum {
+ Opt_sec_krb5,
+ Opt_sec_krb5i,
+ Opt_sec_krb5p,
+ Opt_sec_lkey,
+ Opt_sec_lkeyi,
+ Opt_sec_lkeyp,
+ Opt_sec_none,
+ Opt_sec_spkm,
+ Opt_sec_spkmi,
+ Opt_sec_spkmp,
+ Opt_sec_sys,
+ nr__Opt_sec
+};
+
+static const struct constant_table nfs_secflavor_tokens[] = {
+ { "krb5", Opt_sec_krb5 },
+ { "krb5i", Opt_sec_krb5i },
+ { "krb5p", Opt_sec_krb5p },
+ { "lkey", Opt_sec_lkey },
+ { "lkeyi", Opt_sec_lkeyi },
+ { "lkeyp", Opt_sec_lkeyp },
+ { "none", Opt_sec_none },
+ { "null", Opt_sec_none },
+ { "spkm3", Opt_sec_spkm },
+ { "spkm3i", Opt_sec_spkmi },
+ { "spkm3p", Opt_sec_spkmp },
+ { "sys", Opt_sec_sys },
+};
+
+/*
+ * Sanity-check a server address provided by the mount command.
+ *
+ * Address family must be initialized, and address must not be
+ * the ANY address for that family.
+ */
+static int nfs_verify_server_address(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+ return sa->sin_addr.s_addr != htonl(INADDR_ANY);
+ }
+ case AF_INET6: {
+ struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ return !ipv6_addr_any(sa);
+ }
+ }
+
+ dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
+ return 0;
+}
+
+/*
+ * Sanity check the NFS transport protocol.
+ *
+ */
+static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+{
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_UDP:
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_RDMA:
+ break;
+ default:
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ }
+}
+
+/*
+ * For text based NFSv2/v3 mounts, the mount protocol transport default
+ * settings should depend upon the specified NFS transport.
+ */
+static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
+{
+ nfs_validate_transport_protocol(ctx);
+
+ if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
+ ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
+ return;
+ switch (ctx->nfs_server.protocol) {
+ case XPRT_TRANSPORT_UDP:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case XPRT_TRANSPORT_TCP:
+ case XPRT_TRANSPORT_RDMA:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+ }
+}
+
+/*
+ * Add 'flavor' to 'auth_info' if not already present.
+ * Returns true if 'flavor' ends up in the list, false otherwise
+ */
+static int nfs_auth_info_add(struct fs_context *fc,
+ struct nfs_auth_info *auth_info,
+ rpc_authflavor_t flavor)
+{
+ unsigned int i;
+ unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
+
+ /* make sure this flavor isn't already in the list */
+ for (i = 0; i < auth_info->flavor_len; i++) {
+ if (flavor == auth_info->flavors[i])
+ return 0;
+ }
+
+ if (auth_info->flavor_len + 1 >= max_flavor_len)
+ return nfs_invalf(fc, "NFS: too many sec= flavors");
+
+ auth_info->flavors[auth_info->flavor_len++] = flavor;
+ return 0;
+}
+
+/*
+ * Parse the value of the 'sec=' option.
+ */
+static int nfs_parse_security_flavors(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ rpc_authflavor_t pseudoflavor;
+ char *string = param->string, *p;
+ int ret;
+
+ dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string);
+
+ while ((p = strsep(&string, ":")) != NULL) {
+ if (!*p)
+ continue;
+ switch (lookup_constant(nfs_secflavor_tokens, p, -1)) {
+ case Opt_sec_none:
+ pseudoflavor = RPC_AUTH_NULL;
+ break;
+ case Opt_sec_sys:
+ pseudoflavor = RPC_AUTH_UNIX;
+ break;
+ case Opt_sec_krb5:
+ pseudoflavor = RPC_AUTH_GSS_KRB5;
+ break;
+ case Opt_sec_krb5i:
+ pseudoflavor = RPC_AUTH_GSS_KRB5I;
+ break;
+ case Opt_sec_krb5p:
+ pseudoflavor = RPC_AUTH_GSS_KRB5P;
+ break;
+ case Opt_sec_lkey:
+ pseudoflavor = RPC_AUTH_GSS_LKEY;
+ break;
+ case Opt_sec_lkeyi:
+ pseudoflavor = RPC_AUTH_GSS_LKEYI;
+ break;
+ case Opt_sec_lkeyp:
+ pseudoflavor = RPC_AUTH_GSS_LKEYP;
+ break;
+ case Opt_sec_spkm:
+ pseudoflavor = RPC_AUTH_GSS_SPKM;
+ break;
+ case Opt_sec_spkmi:
+ pseudoflavor = RPC_AUTH_GSS_SPKMI;
+ break;
+ case Opt_sec_spkmp:
+ pseudoflavor = RPC_AUTH_GSS_SPKMP;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: sec=%s option not recognized", p);
+ }
+
+ ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int nfs_parse_version_string(struct fs_context *fc,
+ const char *string)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ ctx->flags &= ~NFS_MOUNT_VER3;
+ switch (lookup_constant(nfs_vers_tokens, string, -1)) {
+ case Opt_vers_2:
+ ctx->version = 2;
+ break;
+ case Opt_vers_3:
+ ctx->flags |= NFS_MOUNT_VER3;
+ ctx->version = 3;
+ break;
+ case Opt_vers_4:
+ /* Backward compatibility option. In future,
+ * the mount program should always supply
+ * a NFSv4 minor version number.
+ */
+ ctx->version = 4;
+ break;
+ case Opt_vers_4_0:
+ ctx->version = 4;
+ ctx->minorversion = 0;
+ break;
+ case Opt_vers_4_1:
+ ctx->version = 4;
+ ctx->minorversion = 1;
+ break;
+ case Opt_vers_4_2:
+ ctx->version = 4;
+ ctx->minorversion = 2;
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unsupported NFS version");
+ }
+ return 0;
+}
+
+/*
+ * Parse a single mount parameter.
+ */
+static int nfs_fs_context_parse_param(struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ unsigned short protofamily, mountfamily;
+ unsigned int len;
+ int ret, opt;
+
+ dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key);
+
+ opt = fs_parse(fc, nfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return ctx->sloppy ? 1 : opt;
+
+ switch (opt) {
+ case Opt_source:
+ if (fc->source)
+ return nfs_invalf(fc, "NFS: Multiple sources not supported");
+ fc->source = param->string;
+ param->string = NULL;
+ break;
+
+ /*
+ * boolean options: foo/nofoo
+ */
+ case Opt_soft:
+ ctx->flags |= NFS_MOUNT_SOFT;
+ ctx->flags &= ~NFS_MOUNT_SOFTERR;
+ break;
+ case Opt_softerr:
+ ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL;
+ ctx->flags &= ~NFS_MOUNT_SOFT;
+ break;
+ case Opt_hard:
+ ctx->flags &= ~(NFS_MOUNT_SOFT |
+ NFS_MOUNT_SOFTERR |
+ NFS_MOUNT_SOFTREVAL);
+ break;
+ case Opt_softreval:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
+ else
+ ctx->flags &= NFS_MOUNT_SOFTREVAL;
+ break;
+ case Opt_posix:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_POSIX;
+ else
+ ctx->flags |= NFS_MOUNT_POSIX;
+ break;
+ case Opt_cto:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOCTO;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOCTO;
+ break;
+ case Opt_ac:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOAC;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOAC;
+ break;
+ case Opt_lock:
+ if (result.negated) {
+ ctx->flags |= NFS_MOUNT_NONLM;
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ ctx->flags &= ~NFS_MOUNT_NONLM;
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL);
+ }
+ break;
+ case Opt_udp:
+ ctx->flags &= ~NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_tcp:
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_rdma:
+ ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+ xprt_load_transport(param->key);
+ break;
+ case Opt_acl:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NOACL;
+ else
+ ctx->flags &= ~NFS_MOUNT_NOACL;
+ break;
+ case Opt_rdirplus:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NORDIRPLUS;
+ else
+ ctx->flags &= ~NFS_MOUNT_NORDIRPLUS;
+ break;
+ case Opt_sharecache:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_UNSHARED;
+ else
+ ctx->flags &= ~NFS_MOUNT_UNSHARED;
+ break;
+ case Opt_resvport:
+ if (result.negated)
+ ctx->flags |= NFS_MOUNT_NORESVPORT;
+ else
+ ctx->flags &= ~NFS_MOUNT_NORESVPORT;
+ break;
+ case Opt_fscache_flag:
+ if (result.negated)
+ ctx->options &= ~NFS_OPTION_FSCACHE;
+ else
+ ctx->options |= NFS_OPTION_FSCACHE;
+ kfree(ctx->fscache_uniq);
+ ctx->fscache_uniq = NULL;
+ break;
+ case Opt_fscache:
+ ctx->options |= NFS_OPTION_FSCACHE;
+ kfree(ctx->fscache_uniq);
+ ctx->fscache_uniq = param->string;
+ param->string = NULL;
+ break;
+ case Opt_migration:
+ if (result.negated)
+ ctx->options &= ~NFS_OPTION_MIGRATION;
+ else
+ ctx->options |= NFS_OPTION_MIGRATION;
+ break;
+
+ /*
+ * options that take numeric values
+ */
+ case Opt_port:
+ if (result.uint_32 > USHRT_MAX)
+ goto out_of_bounds;
+ ctx->nfs_server.port = result.uint_32;
+ break;
+ case Opt_rsize:
+ ctx->rsize = result.uint_32;
+ break;
+ case Opt_wsize:
+ ctx->wsize = result.uint_32;
+ break;
+ case Opt_bsize:
+ ctx->bsize = result.uint_32;
+ break;
+ case Opt_timeo:
+ if (result.uint_32 < 1 || result.uint_32 > INT_MAX)
+ goto out_of_bounds;
+ ctx->timeo = result.uint_32;
+ break;
+ case Opt_retrans:
+ if (result.uint_32 > INT_MAX)
+ goto out_of_bounds;
+ ctx->retrans = result.uint_32;
+ break;
+ case Opt_acregmin:
+ ctx->acregmin = result.uint_32;
+ break;
+ case Opt_acregmax:
+ ctx->acregmax = result.uint_32;
+ break;
+ case Opt_acdirmin:
+ ctx->acdirmin = result.uint_32;
+ break;
+ case Opt_acdirmax:
+ ctx->acdirmax = result.uint_32;
+ break;
+ case Opt_actimeo:
+ ctx->acregmin = result.uint_32;
+ ctx->acregmax = result.uint_32;
+ ctx->acdirmin = result.uint_32;
+ ctx->acdirmax = result.uint_32;
+ break;
+ case Opt_namelen:
+ ctx->namlen = result.uint_32;
+ break;
+ case Opt_mountport:
+ if (result.uint_32 > USHRT_MAX)
+ goto out_of_bounds;
+ ctx->mount_server.port = result.uint_32;
+ break;
+ case Opt_mountvers:
+ if (result.uint_32 < NFS_MNT_VERSION ||
+ result.uint_32 > NFS_MNT3_VERSION)
+ goto out_of_bounds;
+ ctx->mount_server.version = result.uint_32;
+ break;
+ case Opt_minorversion:
+ if (result.uint_32 > NFS4_MAX_MINOR_VERSION)
+ goto out_of_bounds;
+ ctx->minorversion = result.uint_32;
+ break;
+
+ /*
+ * options that take text values
+ */
+ case Opt_v:
+ ret = nfs_parse_version_string(fc, param->key + 1);
+ if (ret < 0)
+ return ret;
+ break;
+ case Opt_vers:
+ ret = nfs_parse_version_string(fc, param->string);
+ if (ret < 0)
+ return ret;
+ break;
+ case Opt_sec:
+ ret = nfs_parse_security_flavors(fc, param);
+ if (ret < 0)
+ return ret;
+ break;
+
+ case Opt_proto:
+ protofamily = AF_INET;
+ switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+ case Opt_xprt_udp6:
+ protofamily = AF_INET6;
+ /* fall through */
+ case Opt_xprt_udp:
+ ctx->flags &= ~NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_xprt_tcp6:
+ protofamily = AF_INET6;
+ /* fall through */
+ case Opt_xprt_tcp:
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_xprt_rdma6:
+ protofamily = AF_INET6;
+ /* fall through */
+ case Opt_xprt_rdma:
+ /* vector side protocols to TCP */
+ ctx->flags |= NFS_MOUNT_TCP;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
+ xprt_load_transport(param->string);
+ break;
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ }
+
+ ctx->protofamily = protofamily;
+ break;
+
+ case Opt_mountproto:
+ mountfamily = AF_INET;
+ switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
+ case Opt_xprt_udp6:
+ mountfamily = AF_INET6;
+ /* fall through */
+ case Opt_xprt_udp:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ break;
+ case Opt_xprt_tcp6:
+ mountfamily = AF_INET6;
+ /* fall through */
+ case Opt_xprt_tcp:
+ ctx->mount_server.protocol = XPRT_TRANSPORT_TCP;
+ break;
+ case Opt_xprt_rdma: /* not used for side protocols */
+ default:
+ return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+ }
+ ctx->mountfamily = mountfamily;
+ break;
+
+ case Opt_addr:
+ len = rpc_pton(fc->net_ns, param->string, param->size,
+ &ctx->nfs_server.address,
+ sizeof(ctx->nfs_server._address));
+ if (len == 0)
+ goto out_invalid_address;
+ ctx->nfs_server.addrlen = len;
+ break;
+ case Opt_clientaddr:
+ kfree(ctx->client_address);
+ ctx->client_address = param->string;
+ param->string = NULL;
+ break;
+ case Opt_mounthost:
+ kfree(ctx->mount_server.hostname);
+ ctx->mount_server.hostname = param->string;
+ param->string = NULL;
+ break;
+ case Opt_mountaddr:
+ len = rpc_pton(fc->net_ns, param->string, param->size,
+ &ctx->mount_server.address,
+ sizeof(ctx->mount_server._address));
+ if (len == 0)
+ goto out_invalid_address;
+ ctx->mount_server.addrlen = len;
+ break;
+ case Opt_nconnect:
+ if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS)
+ goto out_of_bounds;
+ ctx->nfs_server.nconnect = result.uint_32;
+ break;
+ case Opt_lookupcache:
+ switch (result.uint_32) {
+ case Opt_lookupcache_all:
+ ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
+ break;
+ case Opt_lookupcache_positive:
+ ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
+ ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
+ break;
+ case Opt_lookupcache_none:
+ ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
+ case Opt_local_lock:
+ switch (result.uint_32) {
+ case Opt_local_lock_all:
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ break;
+ case Opt_local_lock_flock:
+ ctx->flags |= NFS_MOUNT_LOCAL_FLOCK;
+ break;
+ case Opt_local_lock_posix:
+ ctx->flags |= NFS_MOUNT_LOCAL_FCNTL;
+ break;
+ case Opt_local_lock_none:
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
+
+ /*
+ * Special options
+ */
+ case Opt_sloppy:
+ ctx->sloppy = true;
+ dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
+ break;
+ }
+
+ return 0;
+
+out_invalid_value:
+ return nfs_invalf(fc, "NFS: Bad mount option value specified");
+out_invalid_address:
+ return nfs_invalf(fc, "NFS: Bad IP address specified");
+out_of_bounds:
+ return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+}
+
+/*
+ * Split fc->source into "hostname:export_path".
+ *
+ * The leftmost colon demarks the split between the server's hostname
+ * and the export path. If the hostname starts with a left square
+ * bracket, then it may contain colons.
+ *
+ * Note: caller frees hostname and export path, even on error.
+ */
+static int nfs_parse_source(struct fs_context *fc,
+ size_t maxnamlen, size_t maxpathlen)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ const char *dev_name = fc->source;
+ size_t len;
+ const char *end;
+
+ if (unlikely(!dev_name || !*dev_name)) {
+ dfprintk(MOUNT, "NFS: device name not specified\n");
+ return -EINVAL;
+ }
+
+ /* Is the host name protected with square brakcets? */
+ if (*dev_name == '[') {
+ end = strchr(++dev_name, ']');
+ if (end == NULL || end[1] != ':')
+ goto out_bad_devname;
+
+ len = end - dev_name;
+ end++;
+ } else {
+ const char *comma;
+
+ end = strchr(dev_name, ':');
+ if (end == NULL)
+ goto out_bad_devname;
+ len = end - dev_name;
+
+ /* kill possible hostname list: not supported */
+ comma = memchr(dev_name, ',', len);
+ if (comma)
+ len = comma - dev_name;
+ }
+
+ if (len > maxnamlen)
+ goto out_hostname;
+
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ goto out_nomem;
+ len = strlen(++end);
+ if (len > maxpathlen)
+ goto out_path;
+ ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL);
+ if (!ctx->nfs_server.export_path)
+ goto out_nomem;
+
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path);
+ return 0;
+
+out_bad_devname:
+ return nfs_invalf(fc, "NFS: device name not in host:path format");
+out_nomem:
+ nfs_errorf(fc, "NFS: not enough memory to parse device name");
+ return -ENOMEM;
+out_hostname:
+ nfs_errorf(fc, "NFS: server hostname too long");
+ return -ENAMETOOLONG;
+out_path:
+ nfs_errorf(fc, "NFS: export pathname too long");
+ return -ENAMETOOLONG;
+}
+
+static inline bool is_remount_fc(struct fs_context *fc)
+{
+ return fc->root != NULL;
+}
+
+/*
+ * Parse monolithic NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ * immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ * mountproto=tcp after mountproto=udp, and so on
+ */
+static int nfs23_parse_monolithic(struct fs_context *fc,
+ struct nfs_mount_data *data)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_fh *mntfh = ctx->mntfh;
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+
+ if (data == NULL)
+ goto out_no_data;
+
+ ctx->version = NFS_DEFAULT_VERSION;
+ switch (data->version) {
+ case 1:
+ data->namlen = 0; /* fall through */
+ case 2:
+ data->bsize = 0; /* fall through */
+ case 3:
+ if (data->flags & NFS_MOUNT_VER3)
+ goto out_no_v3;
+ data->root.size = NFS2_FHSIZE;
+ memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+ /* Turn off security negotiation */
+ extra_flags |= NFS_MOUNT_SECFLAVOUR;
+ /* fall through */
+ case 4:
+ if (data->flags & NFS_MOUNT_SECFLAVOUR)
+ goto out_no_sec;
+ /* fall through */
+ case 5:
+ memset(data->context, 0, sizeof(data->context));
+ /* fall through */
+ case 6:
+ if (data->flags & NFS_MOUNT_VER3) {
+ if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+ goto out_invalid_fh;
+ mntfh->size = data->root.size;
+ ctx->version = 3;
+ } else {
+ mntfh->size = NFS2_FHSIZE;
+ ctx->version = 2;
+ }
+
+
+ memcpy(mntfh->data, data->root.data, mntfh->size);
+ if (mntfh->size < sizeof(mntfh->data))
+ memset(mntfh->data + mntfh->size, 0,
+ sizeof(mntfh->data) - mntfh->size);
+
+ /*
+ * Translate to nfs_fs_context, which nfs_fill_super
+ * can deal with.
+ */
+ ctx->flags = data->flags & NFS_MOUNT_FLAGMASK;
+ ctx->flags |= extra_flags;
+ ctx->rsize = data->rsize;
+ ctx->wsize = data->wsize;
+ ctx->timeo = data->timeo;
+ ctx->retrans = data->retrans;
+ ctx->acregmin = data->acregmin;
+ ctx->acregmax = data->acregmax;
+ ctx->acdirmin = data->acdirmin;
+ ctx->acdirmax = data->acdirmax;
+ ctx->need_mount = false;
+
+ memcpy(sap, &data->addr, sizeof(data->addr));
+ ctx->nfs_server.addrlen = sizeof(data->addr);
+ ctx->nfs_server.port = ntohs(data->addr.sin_port);
+ if (sap->sa_family != AF_INET ||
+ !nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (!(data->flags & NFS_MOUNT_TCP))
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ goto out_nomem;
+
+ ctx->namlen = data->namlen;
+ ctx->bsize = data->bsize;
+
+ if (data->flags & NFS_MOUNT_SECFLAVOUR)
+ ctx->selected_flavor = data->pseudoflavor;
+ else
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+
+ if (!(data->flags & NFS_MOUNT_NONLM))
+ ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+ NFS_MOUNT_LOCAL_FCNTL);
+ else
+ ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+ NFS_MOUNT_LOCAL_FCNTL);
+
+ /*
+ * The legacy version 6 binary mount data from userspace has a
+ * field used only to transport selinux information into the
+ * the kernel. To continue to support that functionality we
+ * have a touch of selinux knowledge here in the NFS code. The
+ * userspace code converted context=blah to just blah so we are
+ * converting back to the full string selinux understands.
+ */
+ if (data->context[0]){
+#ifdef CONFIG_SECURITY_SELINUX
+ int ret;
+
+ data->context[NFS_MAX_CONTEXT_LEN] = '\0';
+ ret = vfs_parse_fs_string(fc, "context",
+ data->context, strlen(data->context));
+ if (ret < 0)
+ return ret;
+#else
+ return -EINVAL;
+#endif
+ }
+
+ break;
+ default:
+ goto generic;
+ }
+
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+
+generic:
+ return generic_parse_monolithic(fc, data);
+
+out_no_data:
+ if (is_remount_fc(fc)) {
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+ }
+ return nfs_invalf(fc, "NFS: mount program didn't pass any mount data");
+
+out_no_v3:
+ return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3");
+
+out_no_sec:
+ return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS");
+
+out_nomem:
+ dfprintk(MOUNT, "NFS: not enough memory to handle mount options");
+ return -ENOMEM;
+
+out_no_address:
+ return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+
+out_invalid_fh:
+ return nfs_invalf(fc, "NFS: invalid root filehandle");
+}
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_parse_monolithic(struct fs_context *fc,
+ struct nfs4_mount_data *data)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ char *c;
+
+ if (data == NULL)
+ goto out_no_data;
+
+ ctx->version = 4;
+
+ switch (data->version) {
+ case 1:
+ if (data->host_addrlen > sizeof(ctx->nfs_server.address))
+ goto out_no_address;
+ if (data->host_addrlen == 0)
+ goto out_no_address;
+ ctx->nfs_server.addrlen = data->host_addrlen;
+ if (copy_from_user(sap, data->host_addr, data->host_addrlen))
+ return -EFAULT;
+ if (!nfs_verify_server_address(sap))
+ goto out_no_address;
+ ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+
+ if (data->auth_flavourlen) {
+ rpc_authflavor_t pseudoflavor;
+ if (data->auth_flavourlen > 1)
+ goto out_inval_auth;
+ if (copy_from_user(&pseudoflavor,
+ data->auth_flavours,
+ sizeof(pseudoflavor)))
+ return -EFAULT;
+ ctx->selected_flavor = pseudoflavor;
+ } else
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+
+ c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->nfs_server.hostname = c;
+
+ c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->nfs_server.export_path = c;
+ dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
+
+ c = strndup_user(data->client_addr.data, 16);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ ctx->client_address = c;
+
+ /*
+ * Translate to nfs_fs_context, which nfs_fill_super
+ * can deal with.
+ */
+
+ ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK;
+ ctx->rsize = data->rsize;
+ ctx->wsize = data->wsize;
+ ctx->timeo = data->timeo;
+ ctx->retrans = data->retrans;
+ ctx->acregmin = data->acregmin;
+ ctx->acregmax = data->acregmax;
+ ctx->acdirmin = data->acdirmin;
+ ctx->acdirmax = data->acdirmax;
+ ctx->nfs_server.protocol = data->proto;
+ nfs_validate_transport_protocol(ctx);
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+
+ break;
+ default:
+ goto generic;
+ }
+
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+
+generic:
+ return generic_parse_monolithic(fc, data);
+
+out_no_data:
+ if (is_remount_fc(fc)) {
+ ctx->skip_reconfig_option_check = true;
+ return 0;
+ }
+ return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data");
+
+out_inval_auth:
+ return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d",
+ data->auth_flavourlen);
+
+out_no_address:
+ return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
+
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp");
+}
+#endif
+
+/*
+ * Parse a monolithic block of data from sys_mount().
+ */
+static int nfs_fs_context_parse_monolithic(struct fs_context *fc,
+ void *data)
+{
+ if (fc->fs_type == &nfs_fs_type)
+ return nfs23_parse_monolithic(fc, data);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+ if (fc->fs_type == &nfs4_fs_type)
+ return nfs4_parse_monolithic(fc, data);
+#endif
+
+ return nfs_invalf(fc, "NFS: Unsupported monolithic data version");
+}
+
+/*
+ * Validate the preparsed information in the config.
+ */
+static int nfs_fs_context_validate(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_subversion *nfs_mod;
+ struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int max_namelen = PAGE_SIZE;
+ int max_pathlen = NFS_MAXPATHLEN;
+ int port = 0;
+ int ret;
+
+ if (!fc->source)
+ goto out_no_device_name;
+
+ /* Check for sanity first. */
+ if (ctx->minorversion && ctx->version != 4)
+ goto out_minorversion_mismatch;
+
+ if (ctx->options & NFS_OPTION_MIGRATION &&
+ (ctx->version != 4 || ctx->minorversion != 0))
+ goto out_migration_misuse;
+
+ /* Verify that any proto=/mountproto= options match the address
+ * families in the addr=/mountaddr= options.
+ */
+ if (ctx->protofamily != AF_UNSPEC &&
+ ctx->protofamily != ctx->nfs_server.address.sa_family)
+ goto out_proto_mismatch;
+
+ if (ctx->mountfamily != AF_UNSPEC) {
+ if (ctx->mount_server.addrlen) {
+ if (ctx->mountfamily != ctx->mount_server.address.sa_family)
+ goto out_mountproto_mismatch;
+ } else {
+ if (ctx->mountfamily != ctx->nfs_server.address.sa_family)
+ goto out_mountproto_mismatch;
+ }
+ }
+
+ if (!nfs_verify_server_address(sap))
+ goto out_no_address;
+
+ if (ctx->version == 4) {
+ if (IS_ENABLED(CONFIG_NFS_V4)) {
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ else
+ port = NFS_PORT;
+ max_namelen = NFS4_MAXNAMLEN;
+ max_pathlen = NFS4_MAXPATHLEN;
+ nfs_validate_transport_protocol(ctx);
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+ ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
+ NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
+ NFS_MOUNT_LOCAL_FCNTL);
+ } else {
+ goto out_v4_not_compiled;
+ }
+ } else {
+ nfs_set_mount_transport_protocol(ctx);
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+ goto out_invalid_transport_udp;
+#endif
+ if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
+ port = NFS_RDMA_PORT;
+ }
+
+ nfs_set_port(sap, &ctx->nfs_server.port, port);
+
+ ret = nfs_parse_source(fc, max_namelen, max_pathlen);
+ if (ret < 0)
+ return ret;
+
+ /* Load the NFS protocol module if we haven't done so yet */
+ if (!ctx->nfs_mod) {
+ nfs_mod = get_nfs_version(ctx->version);
+ if (IS_ERR(nfs_mod)) {
+ ret = PTR_ERR(nfs_mod);
+ goto out_version_unavailable;
+ }
+ ctx->nfs_mod = nfs_mod;
+ }
+ return 0;
+
+out_no_device_name:
+ return nfs_invalf(fc, "NFS: Device name not specified");
+out_v4_not_compiled:
+ nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
+ return -EPROTONOSUPPORT;
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp");
+out_no_address:
+ return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
+out_mountproto_mismatch:
+ return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option");
+out_proto_mismatch:
+ return nfs_invalf(fc, "NFS: Server address does not match proto= option");
+out_minorversion_mismatch:
+ return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u",
+ ctx->version, ctx->minorversion);
+out_migration_misuse:
+ return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version");
+out_version_unavailable:
+ nfs_errorf(fc, "NFS: Version unavailable");
+ return ret;
+}
+
+/*
+ * Create an NFS superblock by the appropriate method.
+ */
+static int nfs_get_tree(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err = nfs_fs_context_validate(fc);
+
+ if (err)
+ return err;
+ if (!ctx->internal)
+ return ctx->nfs_mod->rpc_ops->try_get_tree(fc);
+ else
+ return nfs_get_tree_common(fc);
+}
+
+/*
+ * Handle duplication of a configuration. The caller copied *src into *sc, but
+ * it can't deal with resource pointers in the filesystem context, so we have
+ * to do that. We need to clear pointers, copy data or get extra refs as
+ * appropriate.
+ */
+static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+ struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx;
+
+ ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->mntfh = nfs_alloc_fhandle();
+ if (!ctx->mntfh) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ nfs_copy_fh(ctx->mntfh, src->mntfh);
+
+ __module_get(ctx->nfs_mod->owner);
+ ctx->client_address = NULL;
+ ctx->mount_server.hostname = NULL;
+ ctx->nfs_server.export_path = NULL;
+ ctx->nfs_server.hostname = NULL;
+ ctx->fscache_uniq = NULL;
+ ctx->clone_data.fattr = NULL;
+ fc->fs_private = ctx;
+ return 0;
+}
+
+static void nfs_fs_context_free(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+
+ if (ctx) {
+ if (ctx->server)
+ nfs_free_server(ctx->server);
+ if (ctx->nfs_mod)
+ put_nfs_version(ctx->nfs_mod);
+ kfree(ctx->client_address);
+ kfree(ctx->mount_server.hostname);
+ kfree(ctx->nfs_server.export_path);
+ kfree(ctx->nfs_server.hostname);
+ kfree(ctx->fscache_uniq);
+ nfs_free_fhandle(ctx->mntfh);
+ nfs_free_fattr(ctx->clone_data.fattr);
+ kfree(ctx);
+ }
+}
+
+static const struct fs_context_operations nfs_fs_context_ops = {
+ .free = nfs_fs_context_free,
+ .dup = nfs_fs_context_dup,
+ .parse_param = nfs_fs_context_parse_param,
+ .parse_monolithic = nfs_fs_context_parse_monolithic,
+ .get_tree = nfs_get_tree,
+ .reconfigure = nfs_reconfigure,
+};
+
+/*
+ * Prepare superblock configuration. We use the namespaces attached to the
+ * context. This may be the current process's namespaces, or it may be a
+ * container's namespaces.
+ */
+static int nfs_init_fs_context(struct fs_context *fc)
+{
+ struct nfs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL);
+ if (unlikely(!ctx))
+ return -ENOMEM;
+
+ ctx->mntfh = nfs_alloc_fhandle();
+ if (unlikely(!ctx->mntfh)) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+
+ ctx->protofamily = AF_UNSPEC;
+ ctx->mountfamily = AF_UNSPEC;
+ ctx->mount_server.port = NFS_UNSPEC_PORT;
+
+ if (fc->root) {
+ /* reconfigure, start with the current config */
+ struct nfs_server *nfss = fc->root->d_sb->s_fs_info;
+ struct net *net = nfss->nfs_client->cl_net;
+
+ ctx->flags = nfss->flags;
+ ctx->rsize = nfss->rsize;
+ ctx->wsize = nfss->wsize;
+ ctx->retrans = nfss->client->cl_timeout->to_retries;
+ ctx->selected_flavor = nfss->client->cl_auth->au_flavor;
+ ctx->acregmin = nfss->acregmin / HZ;
+ ctx->acregmax = nfss->acregmax / HZ;
+ ctx->acdirmin = nfss->acdirmin / HZ;
+ ctx->acdirmax = nfss->acdirmax / HZ;
+ ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
+ ctx->nfs_server.port = nfss->port;
+ ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
+ ctx->version = nfss->nfs_client->rpc_ops->version;
+ ctx->minorversion = nfss->nfs_client->cl_minorversion;
+
+ memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+ ctx->nfs_server.addrlen);
+
+ if (fc->net_ns != net) {
+ put_net(fc->net_ns);
+ fc->net_ns = get_net(net);
+ }
+
+ ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
+ __module_get(ctx->nfs_mod->owner);
+ } else {
+ /* defaults */
+ ctx->timeo = NFS_UNSPEC_TIMEO;
+ ctx->retrans = NFS_UNSPEC_RETRANS;
+ ctx->acregmin = NFS_DEF_ACREGMIN;
+ ctx->acregmax = NFS_DEF_ACREGMAX;
+ ctx->acdirmin = NFS_DEF_ACDIRMIN;
+ ctx->acdirmax = NFS_DEF_ACDIRMAX;
+ ctx->nfs_server.port = NFS_UNSPEC_PORT;
+ ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
+ ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
+ ctx->minorversion = 0;
+ ctx->need_mount = true;
+ }
+ fc->fs_private = ctx;
+ fc->ops = &nfs_fs_context_ops;
+ return 0;
+}
+
+struct file_system_type nfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs",
+ .init_fs_context = nfs_init_fs_context,
+ .parameters = nfs_fs_parameters,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs");
+EXPORT_SYMBOL_GPL(nfs_fs_type);
+
+#if IS_ENABLED(CONFIG_NFS_V4)
+struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .init_fs_context = nfs_init_fs_context,
+ .parameters = nfs_fs_parameters,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+};
+MODULE_ALIAS_FS("nfs4");
+MODULE_ALIAS("nfs4");
+EXPORT_SYMBOL_GPL(nfs4_fs_type);
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 7def925d3af5..52270bfac120 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -128,7 +128,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
return;
key->nfs_client = nfss->nfs_client;
- key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
+ key->key.super.s_flags = sb->s_flags & NFS_SB_MASK;
key->key.nfs_server.flags = nfss->flags;
key->key.nfs_server.rsize = nfss->rsize;
key->key.nfs_server.wsize = nfss->wsize;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 878c4c5982d9..b012c2668a1f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -64,66 +64,71 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
/*
* get an NFS2/NFS3 root dentry from the root filehandle
*/
-struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
- const char *devname)
+int nfs_get_root(struct super_block *s, struct fs_context *fc)
{
- struct nfs_server *server = NFS_SB(sb);
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct nfs_server *server = NFS_SB(s);
struct nfs_fsinfo fsinfo;
- struct dentry *ret;
+ struct dentry *root;
struct inode *inode;
- void *name = kstrdup(devname, GFP_KERNEL);
- int error;
+ char *name;
+ int error = -ENOMEM;
+ name = kstrdup(fc->source, GFP_KERNEL);
if (!name)
- return ERR_PTR(-ENOMEM);
+ goto out;
/* get the actual root for this mount */
fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL) {
- kfree(name);
- return ERR_PTR(-ENOMEM);
- }
+ if (fsinfo.fattr == NULL)
+ goto out_name;
- error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+ error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo);
if (error < 0) {
dprintk("nfs_get_root: getattr error = %d\n", -error);
- ret = ERR_PTR(error);
- goto out;
+ nfs_errorf(fc, "NFS: Couldn't getattr on root");
+ goto out_fattr;
}
- inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL);
+ inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
if (IS_ERR(inode)) {
dprintk("nfs_get_root: get root inode failed\n");
- ret = ERR_CAST(inode);
- goto out;
+ error = PTR_ERR(inode);
+ nfs_errorf(fc, "NFS: Couldn't get root inode");
+ goto out_fattr;
}
- error = nfs_superblock_set_dummy_root(sb, inode);
- if (error != 0) {
- ret = ERR_PTR(error);
- goto out;
- }
+ error = nfs_superblock_set_dummy_root(s, inode);
+ if (error != 0)
+ goto out_fattr;
/* root dentries normally start off anonymous and get spliced in later
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
*/
- ret = d_obtain_root(inode);
- if (IS_ERR(ret)) {
+ root = d_obtain_root(inode);
+ if (IS_ERR(root)) {
dprintk("nfs_get_root: get root dentry failed\n");
- goto out;
+ error = PTR_ERR(root);
+ nfs_errorf(fc, "NFS: Couldn't get root dentry");
+ goto out_fattr;
}
- security_d_instantiate(ret, inode);
- spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !ret->d_fsdata &&
- !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
- ret->d_fsdata = name;
+ security_d_instantiate(root, inode);
+ spin_lock(&root->d_lock);
+ if (IS_ROOT(root) && !root->d_fsdata &&
+ !(root->d_flags & DCACHE_NFSFS_RENAMED)) {
+ root->d_fsdata = name;
name = NULL;
}
- spin_unlock(&ret->d_lock);
-out:
- kfree(name);
+ spin_unlock(&root->d_lock);
+ fc->root = root;
+ error = 0;
+
+out_fattr:
nfs_free_fattr(fsinfo.fattr);
- return ret;
+out_name:
+ kfree(name);
+out:
+ return error;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b0b4b9f303fd..1309e6f47f3d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1061,7 +1061,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct
rcu_read_lock();
list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
- if (cred != NULL && pos->cred != cred)
+ if (cred != NULL && cred_fscmp(pos->cred, cred) != 0)
continue;
if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
continue;
@@ -1156,7 +1156,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(inode), status);
- if (status == -ESTALE) {
+ switch (status) {
+ case -ETIMEDOUT:
+ /* A soft timeout occurred. Use cached information? */
+ if (server->flags & NFS_MOUNT_SOFTREVAL)
+ status = 0;
+ break;
+ case -ESTALE:
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 24a65da58aa9..f80c47d5ff27 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,17 +4,19 @@
*/
#include "nfs4_fs.h"
-#include <linux/mount.h>
+#include <linux/fs_context.h>
#include <linux/security.h>
#include <linux/crc32.h>
+#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
#include <linux/wait_bit.h>
-#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
struct nfs_string;
+struct nfs_pageio_descriptor;
static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
{
@@ -31,17 +33,14 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
return 1;
}
-struct nfs_clone_mount {
- const struct super_block *sb;
- const struct dentry *dentry;
- struct nfs_fh *fh;
- struct nfs_fattr *fattr;
- char *hostname;
- char *mnt_path;
- struct sockaddr *addr;
- size_t addrlen;
- rpc_authflavor_t authflavor;
-};
+static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
+{
+ if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL))
+ return false;
+ if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size)
+ return false;
+ return true;
+}
/*
* Note: RFC 1813 doesn't limit the number of auth flavors that
@@ -82,12 +81,16 @@ struct nfs_client_initdata {
/*
* In-kernel mount arguments
*/
-struct nfs_parsed_mount_data {
- int flags;
+struct nfs_fs_context {
+ bool internal;
+ bool skip_reconfig_option_check;
+ bool need_mount;
+ bool sloppy;
+ unsigned int flags; /* NFS{,4}_MOUNT_* flags */
unsigned int rsize, wsize;
unsigned int timeo, retrans;
- unsigned int acregmin, acregmax,
- acdirmin, acdirmax;
+ unsigned int acregmin, acregmax;
+ unsigned int acdirmin, acdirmax;
unsigned int namlen;
unsigned int options;
unsigned int bsize;
@@ -97,10 +100,14 @@ struct nfs_parsed_mount_data {
unsigned int version;
unsigned int minorversion;
char *fscache_uniq;
- bool need_mount;
+ unsigned short protofamily;
+ unsigned short mountfamily;
struct {
- struct sockaddr_storage address;
+ union {
+ struct sockaddr address;
+ struct sockaddr_storage _address;
+ };
size_t addrlen;
char *hostname;
u32 version;
@@ -109,19 +116,41 @@ struct nfs_parsed_mount_data {
} mount_server;
struct {
- struct sockaddr_storage address;
+ union {
+ struct sockaddr address;
+ struct sockaddr_storage _address;
+ };
size_t addrlen;
char *hostname;
char *export_path;
int port;
unsigned short protocol;
unsigned short nconnect;
+ unsigned short export_path_len;
} nfs_server;
- void *lsm_opts;
- struct net *net;
+ struct nfs_fh *mntfh;
+ struct nfs_server *server;
+ struct nfs_subversion *nfs_mod;
+
+ /* Information for a cloned mount. */
+ struct nfs_clone_mount {
+ struct super_block *sb;
+ struct dentry *dentry;
+ struct nfs_fattr *fattr;
+ unsigned int inherited_bsize;
+ } clone_data;
};
+#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__)
+#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
+#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__)
+
+static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
+{
+ return fc->fs_private;
+}
+
/* mount_clnt.c */
struct nfs_mount_request {
struct sockaddr *sap;
@@ -137,14 +166,6 @@ struct nfs_mount_request {
struct net *net;
};
-struct nfs_mount_info {
- void (*fill_super)(struct super_block *, struct nfs_mount_info *);
- int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
- struct nfs_parsed_mount_data *parsed;
- struct nfs_clone_mount *cloned;
- struct nfs_fh *mntfh;
-};
-
extern int nfs_mount(struct nfs_mount_request *info);
extern void nfs_umount(const struct nfs_mount_request *info);
@@ -170,13 +191,9 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *, u32);
-extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
- struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_server(
- struct nfs_mount_info *,
- struct nfs_subversion *);
-extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
- struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_server(struct fs_context *);
+extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
struct sockaddr *sap, size_t salen,
struct net *net);
@@ -227,7 +244,9 @@ static inline void nfs_fs_proc_exit(void)
extern const struct svc_version nfs4_callback_version1;
extern const struct svc_version nfs4_callback_version4;
-struct nfs_pageio_descriptor;
+/* fs_context.c */
+extern struct file_system_type nfs_fs_type;
+
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
@@ -387,23 +406,10 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
/* super.c */
extern const struct super_operations nfs_sops;
-extern struct file_system_type nfs_fs_type;
-extern struct file_system_type nfs_xdev_fs_type;
-#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct file_system_type nfs4_referral_fs_type;
-#endif
bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t);
-struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
- struct nfs_subversion *);
-int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
-struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
- struct nfs_mount_info *, struct nfs_subversion *);
-struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *);
-struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
- const char *, struct nfs_mount_info *);
+int nfs_try_get_tree(struct fs_context *);
+int nfs_get_tree_common(struct fs_context *);
void nfs_kill_super(struct super_block *);
-void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
extern struct rpc_stat nfs_rpcstat;
@@ -430,18 +436,12 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
extern char *nfs_path(char **p, struct dentry *dentry,
char *buffer, ssize_t buflen, unsigned flags);
extern struct vfsmount *nfs_d_automount(struct path *path);
-struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *,
- struct nfs_fh *, struct nfs_fattr *);
-struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
- struct nfs_fattr *, rpc_authflavor_t);
+int nfs_submount(struct fs_context *, struct nfs_server *);
+int nfs_do_submount(struct fs_context *);
/* getroot.c */
-extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
- const char *);
+extern int nfs_get_root(struct super_block *s, struct fs_context *fc);
#if IS_ENABLED(CONFIG_NFS_V4)
-extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
- const char *);
-
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
#endif
@@ -460,7 +460,7 @@ int nfs_show_options(struct seq_file *, struct dentry *);
int nfs_show_devname(struct seq_file *, struct dentry *);
int nfs_show_path(struct seq_file *, struct dentry *);
int nfs_show_stats(struct seq_file *, struct dentry *);
-int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
+int nfs_reconfigure(struct fs_context *);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -706,9 +706,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
}
/*
- * Convert a struct timespec into a 64-bit change attribute
+ * Convert a struct timespec64 into a 64-bit change attribute
*
- * This does approximately the same thing as timespec_to_ns(),
+ * This does approximately the same thing as timespec64_to_ns(),
* but for calculation efficiency, we multiply the seconds by
* 1024*1024*1024.
*/
@@ -777,3 +777,16 @@ static inline bool nfs_error_is_fatal_on_server(int err)
}
return nfs_error_is_fatal(err);
}
+
+/*
+ * Select between a default port value and a user-specified port value.
+ * If a zero value is set, then autobind will be used.
+ */
+static inline void nfs_set_port(struct sockaddr *sap, int *port,
+ const unsigned short default_port)
+{
+ if (*port == NFS_UNSPEC_PORT)
+ *port = default_port;
+
+ rpc_set_port(sap, *port);
+}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index cb7c10e9721e..35c8cb2d7637 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -29,9 +29,7 @@
*/
#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN))
#define MNT_status_sz (1)
-#define MNT_fhs_status_sz (1)
#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE)
-#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE))
#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS)
/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5e0e9d29f5c5..ad6077404947 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -19,6 +19,7 @@
#include <linux/vfs.h>
#include <linux/sunrpc/gss_api.h>
#include "internal.h"
+#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -139,34 +140,65 @@ EXPORT_SYMBOL_GPL(nfs_path);
*/
struct vfsmount *nfs_d_automount(struct path *path)
{
- struct vfsmount *mnt;
+ struct nfs_fs_context *ctx;
+ struct fs_context *fc;
+ struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
- struct nfs_fh *fh = NULL;
- struct nfs_fattr *fattr = NULL;
+ struct nfs_client *client = server->nfs_client;
+ int ret;
if (IS_ROOT(path->dentry))
return ERR_PTR(-ESTALE);
- mnt = ERR_PTR(-ENOMEM);
- fh = nfs_alloc_fhandle();
- fattr = nfs_alloc_fattr();
- if (fh == NULL || fattr == NULL)
- goto out;
+ /* Open a new filesystem context, transferring parameters from the
+ * parent superblock, including the network namespace.
+ */
+ fc = fs_context_for_submount(&nfs_fs_type, path->dentry);
+ if (IS_ERR(fc))
+ return ERR_CAST(fc);
- mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
+ ctx = nfs_fc2context(fc);
+ ctx->clone_data.dentry = path->dentry;
+ ctx->clone_data.sb = path->dentry->d_sb;
+ ctx->clone_data.fattr = nfs_alloc_fattr();
+ if (!ctx->clone_data.fattr)
+ goto out_fc;
+
+ if (fc->net_ns != client->cl_net) {
+ put_net(fc->net_ns);
+ fc->net_ns = get_net(client->cl_net);
+ }
+
+ /* for submounts we want the same server; referrals will reassign */
+ memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+ ctx->nfs_server.addrlen = client->cl_addrlen;
+ ctx->nfs_server.port = server->port;
+
+ ctx->version = client->rpc_ops->version;
+ ctx->minorversion = client->cl_minorversion;
+ ctx->nfs_mod = client->cl_nfs_mod;
+ __module_get(ctx->nfs_mod->owner);
+
+ ret = client->rpc_ops->submount(fc, server);
+ if (ret < 0) {
+ mnt = ERR_PTR(ret);
+ goto out_fc;
+ }
+
+ up_write(&fc->root->d_sb->s_umount);
+ mnt = vfs_create_mount(fc);
if (IS_ERR(mnt))
- goto out;
+ goto out_fc;
if (nfs_mountpoint_expiry_timeout < 0)
- goto out;
+ goto out_fc;
mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
-out:
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fh);
+out_fc:
+ put_fs_context(fc);
return mnt;
}
@@ -213,16 +245,6 @@ void nfs_release_automount_timer(void)
cancel_delayed_work(&nfs_automount_task);
}
-/*
- * Clone a mountpoint of the appropriate type
- */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
- const char *devname,
- struct nfs_clone_mount *mountdata)
-{
- return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata);
-}
-
/**
* nfs_do_submount - set up mountpoint when crossing a filesystem boundary
* @dentry: parent directory
@@ -231,46 +253,62 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
* @authflavor: security flavor to use when performing the mount
*
*/
-struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
- struct nfs_fattr *fattr, rpc_authflavor_t authflavor)
+int nfs_do_submount(struct fs_context *fc)
{
- struct nfs_clone_mount mountdata = {
- .sb = dentry->d_sb,
- .dentry = dentry,
- .fh = fh,
- .fattr = fattr,
- .authflavor = authflavor,
- };
- struct vfsmount *mnt;
- char *page = (char *) __get_free_page(GFP_USER);
- char *devname;
-
- if (page == NULL)
- return ERR_PTR(-ENOMEM);
-
- devname = nfs_devname(dentry, page, PAGE_SIZE);
- if (IS_ERR(devname))
- mnt = ERR_CAST(devname);
- else
- mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
-
- free_page((unsigned long)page);
- return mnt;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
+ struct nfs_server *server;
+ char *buffer, *p;
+ int ret;
+
+ /* create a new volume representation */
+ server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb),
+ ctx->mntfh,
+ ctx->clone_data.fattr,
+ ctx->selected_flavor);
+
+ if (IS_ERR(server))
+ return PTR_ERR(server);
+
+ ctx->server = server;
+
+ buffer = kmalloc(4096, GFP_USER);
+ if (!buffer)
+ return -ENOMEM;
+
+ ctx->internal = true;
+ ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits;
+
+ p = nfs_devname(dentry, buffer, 4096);
+ if (IS_ERR(p)) {
+ nfs_errorf(fc, "NFS: Couldn't determine submount pathname");
+ ret = PTR_ERR(p);
+ } else {
+ ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p);
+ if (!ret)
+ ret = vfs_get_tree(fc);
+ }
+ kfree(buffer);
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_do_submount);
-struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs_submount(struct fs_context *fc, struct nfs_server *server)
{
- int err;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
struct dentry *parent = dget_parent(dentry);
+ int err;
/* Look it up again to get its attributes */
- err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL);
+ err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
+ ctx->mntfh, ctx->clone_data.fattr,
+ NULL);
dput(parent);
if (err != 0)
- return ERR_PTR(err);
+ return err;
- return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
+ ctx->selected_flavor = server->client->cl_auth->au_flavor;
+ return nfs_do_submount(fc);
}
EXPORT_SYMBOL_GPL(nfs_submount);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d94c7abdf25a..f6676af37d5d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -360,17 +360,17 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr,
else
*p++ = cpu_to_be32(NFS2_SATTR_NOT_SET);
- if (attr->ia_valid & ATTR_ATIME_SET) {
+ if (attr->ia_valid & ATTR_ATIME_SET)
p = xdr_encode_time(p, &attr->ia_atime);
- } else if (attr->ia_valid & ATTR_ATIME) {
+ else if (attr->ia_valid & ATTR_ATIME)
p = xdr_encode_current_server_time(p, &attr->ia_atime);
- } else
+ else
p = xdr_time_not_set(p);
- if (attr->ia_valid & ATTR_MTIME_SET) {
+ if (attr->ia_valid & ATTR_MTIME_SET)
xdr_encode_time(p, &attr->ia_mtime);
- } else if (attr->ia_valid & ATTR_MTIME) {
+ else if (attr->ia_valid & ATTR_MTIME)
xdr_encode_current_server_time(p, &attr->ia_mtime);
- } else
+ else
xdr_time_not_set(p);
}
diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index f82e11c4cb56..1b950b66b3bb 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
#endif /* CONFIG_NFS_V3_ACL */
/* nfs3client.c */
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
+struct nfs_server *nfs3_create_server(struct fs_context *);
struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *, rpc_authflavor_t);
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 223904bc40a7..5601e47360c2 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -46,10 +46,10 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server)
}
#endif
-struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs3_create_server(struct fs_context *fc)
{
- struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);
+ struct nfs_server *server = nfs_create_server(fc);
+
/* Create a client RPC handle for the NFS v3 ACL management interface */
if (!IS_ERR(server))
nfs_init_server_aclclient(server);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 9eb2f1a503ab..a46d1d5d16d8 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -110,10 +110,15 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = fattr,
};
int status;
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client, &msg, 0);
+ status = rpc_call_sync(server->client, &msg, task_flags);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -140,23 +145,23 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
nfs_fattr_init(fattr);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status == 0) {
+ nfs_setattr_update_inode(inode, sattr, fattr);
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode);
- nfs_setattr_update_inode(inode, sattr, fattr);
}
dprintk("NFS reply setattr: %d\n", status);
return status;
}
static int
-nfs3_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs3_diropargs arg = {
.fh = NFS_FH(dir),
- .name = name->name,
- .len = name->len
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len
};
struct nfs3_diropres res = {
.fh = fhandle,
@@ -168,20 +173,25 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name,
.rpc_resp = &res,
};
int status;
+ unsigned short task_flags = 0;
- dprintk("NFS call lookup %s\n", name->name);
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
+
+ dprintk("NFS call lookup %pd2\n", dentry);
res.dir_attr = nfs_alloc_fattr();
if (res.dir_attr == NULL)
return -ENOMEM;
nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
nfs_refresh_inode(dir, res.dir_attr);
if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_argp = fhandle;
msg.rpc_resp = fattr;
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
}
nfs_free_fattr(res.dir_attr);
dprintk("NFS reply lookup: %d\n", status);
@@ -990,7 +1000,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.nlmclnt_ops = &nlmclnt_fl_close_lock_ops,
.getroot = nfs3_proc_get_root,
.submount = nfs_submount,
- .try_mount = nfs_try_mount,
+ .try_get_tree = nfs_try_get_tree,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 927eb680f161..69971f6c840d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
void *data)
{
struct nfs_commitres *result = data;
+ struct nfs_writeverf *verf = result->verf;
enum nfs_stat status;
int error;
@@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
- error = decode_writeverf3(xdr, &result->verf->verifier);
+ error = decode_writeverf3(xdr, &verf->verifier);
+ if (!error)
+ verf->committed = NFS_FILE_SYNC;
out:
return error;
out_status:
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1fe83e0f663e..e2ae54b35dfe 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
@@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
} else {
status = nfs4_set_rw_stateid(&args->src_stateid,
src_lock->open_context, src_lock, FMODE_READ);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
}
status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
pos_src, pos_src + (loff_t)count - 1);
@@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src,
status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
status = nfs_sync_inode(dst_inode);
if (status)
@@ -334,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src,
status = handle_async_copy(res, dst_server, src_server, src,
dst, &args->src_stateid, restart);
if (status)
- return status;
+ goto out;
}
if ((!res->synchronous || !args->sync) &&
res->write_res.verifier.committed != NFS_FILE_SYNC) {
status = process_copy_commit(dst, pos_dst, res);
if (status)
- return status;
+ goto out;
}
truncate_pagecache_range(dst_inode, pos_dst,
@@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
FMODE_READ);
nfs_put_lock_context(l_ctx);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
status = nfs4_call_sync(src_server->client, src_server, &msg,
&args->cna_seq_args, &res->cnr_seq_res, 0);
@@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
lock, FMODE_READ);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
status = nfs_filemap_write_and_wait_range(inode->i_mapping,
offset, LLONG_MAX);
@@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
src_lock, FMODE_READ);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
-
+ }
status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
dst_lock, FMODE_WRITE);
- if (status)
+ if (status) {
+ if (status == -EAGAIN)
+ status = -NFS4ERR_BAD_STATEID;
return status;
+ }
res.dst_fattr = nfs_alloc_fattr();
if (!res.dst_fattr)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a7a73b1d1fec..8be1ba7c62bb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -268,14 +268,13 @@ extern const struct dentry_operations nfs4_dentry_operations;
int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
unsigned, umode_t);
-/* super.c */
+/* fs_context.c */
extern struct file_system_type nfs4_fs_type;
/* nfs4namespace.c */
struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
const struct qstr *);
-struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
- struct nfs_fh *, struct nfs_fattr *);
+int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
@@ -303,8 +302,10 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc
extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
struct page *page, const struct cred *);
extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
-extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *,
- struct nfs_fh *, struct nfs_fattr *);
+extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
+ struct dentry *,
+ struct nfs_fh *,
+ struct nfs_fattr *);
extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
extern const struct xattr_handler *nfs4_xattr_handlers[];
extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
@@ -446,9 +447,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *);
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
-extern void nfs4_set_lease_period(struct nfs_client *clp,
- unsigned long lease,
- unsigned long lastrenewed);
+extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease);
/* nfs4state.c */
@@ -526,7 +525,6 @@ extern const nfs4_stateid invalid_stateid;
/* nfs4super.c */
struct nfs_mount_info;
extern struct nfs_subversion nfs_v4;
-struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *);
extern bool nfs4_disable_idmapping;
extern unsigned short max_session_slots;
extern unsigned short max_session_cb_slots;
@@ -536,6 +534,9 @@ extern bool recover_lost_locks;
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
+extern int nfs4_try_get_tree(struct fs_context *);
+extern int nfs4_get_referral_tree(struct fs_context *);
+
/* nfs4sysctl.c */
#ifdef CONFIG_SYSCTL
int nfs4_register_sysctl(void);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 460d6251c405..0cd767e5c977 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1055,66 +1055,64 @@ out:
/*
* Create a version 4 volume record
*/
-static int nfs4_init_server(struct nfs_server *server,
- struct nfs_parsed_mount_data *data)
+static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct rpc_timeout timeparms;
int error;
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
+ nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol,
+ ctx->timeo, ctx->retrans);
/* Initialise the client representation from the mount data */
- server->flags = data->flags;
- server->options = data->options;
- server->auth_info = data->auth_info;
+ server->flags = ctx->flags;
+ server->options = ctx->options;
+ server->auth_info = ctx->auth_info;
/* Use the first specified auth flavor. If this flavor isn't
* allowed by the server, use the SECINFO path to try the
* other specified flavors */
- if (data->auth_info.flavor_len >= 1)
- data->selected_flavor = data->auth_info.flavors[0];
+ if (ctx->auth_info.flavor_len >= 1)
+ ctx->selected_flavor = ctx->auth_info.flavors[0];
else
- data->selected_flavor = RPC_AUTH_UNIX;
+ ctx->selected_flavor = RPC_AUTH_UNIX;
/* Get a client record */
error = nfs4_set_client(server,
- data->nfs_server.hostname,
- (const struct sockaddr *)&data->nfs_server.address,
- data->nfs_server.addrlen,
- data->client_address,
- data->nfs_server.protocol,
- &timeparms,
- data->minorversion,
- data->nfs_server.nconnect,
- data->net);
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
+ ctx->client_address,
+ ctx->nfs_server.protocol,
+ &timeparms,
+ ctx->minorversion,
+ ctx->nfs_server.nconnect,
+ fc->net_ns);
if (error < 0)
return error;
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
+ if (ctx->rsize)
+ server->rsize = nfs_block_size(ctx->rsize, NULL);
+ if (ctx->wsize)
+ server->wsize = nfs_block_size(ctx->wsize, NULL);
- server->acregmin = data->acregmin * HZ;
- server->acregmax = data->acregmax * HZ;
- server->acdirmin = data->acdirmin * HZ;
- server->acdirmax = data->acdirmax * HZ;
- server->port = data->nfs_server.port;
+ server->acregmin = ctx->acregmin * HZ;
+ server->acregmax = ctx->acregmax * HZ;
+ server->acdirmin = ctx->acdirmin * HZ;
+ server->acdirmax = ctx->acdirmax * HZ;
+ server->port = ctx->nfs_server.port;
return nfs_init_server_rpcclient(server, &timeparms,
- data->selected_flavor);
+ ctx->selected_flavor);
}
/*
* Create a version 4 volume record
* - keyed on server and FSID
*/
-/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
- struct nfs_fh *mntfh)*/
-struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+struct nfs_server *nfs4_create_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_server *server;
bool auth_probe;
int error;
@@ -1125,14 +1123,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
server->cred = get_cred(current_cred());
- auth_probe = mount_info->parsed->auth_info.flavor_len < 1;
+ auth_probe = ctx->auth_info.flavor_len < 1;
/* set up the general RPC client */
- error = nfs4_init_server(server, mount_info->parsed);
+ error = nfs4_init_server(server, fc);
if (error < 0)
goto error;
- error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe);
+ error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
@@ -1146,9 +1144,9 @@ error:
/*
* Create an NFS4 referral server record
*/
-struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
- struct nfs_fh *mntfh)
+struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
bool auth_probe;
@@ -1158,7 +1156,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (!server)
return ERR_PTR(-ENOMEM);
- parent_server = NFS_SB(data->sb);
+ parent_server = NFS_SB(ctx->clone_data.sb);
parent_client = parent_server->nfs_client;
server->cred = get_cred(parent_server->cred);
@@ -1168,10 +1166,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
/* Get a client representation */
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
- rpc_set_port(data->addr, NFS_RDMA_PORT);
- error = nfs4_set_client(server, data->hostname,
- data->addr,
- data->addrlen,
+ rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
+ error = nfs4_set_client(server,
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
parent_server->client->cl_timeout,
@@ -1182,10 +1181,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
goto init_server;
#endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
- rpc_set_port(data->addr, NFS_PORT);
- error = nfs4_set_client(server, data->hostname,
- data->addr,
- data->addrlen,
+ rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
+ error = nfs4_set_client(server,
+ ctx->nfs_server.hostname,
+ &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
parent_server->client->cl_timeout,
@@ -1198,13 +1198,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
#if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA)
init_server:
#endif
- error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
+ error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout,
+ ctx->selected_flavor);
if (error < 0)
goto error;
auth_probe = parent_server->auth_info.flavor_len < 1;
- error = nfs4_server_common_setup(server, mntfh, auth_probe);
+ error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe);
if (error < 0)
goto error;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 620de905cba9..be4eb720d5b6 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/falloc.h>
+#include <linux/mount.h>
#include <linux/nfs_fs.h>
#include "delegation.h"
#include "internal.h"
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2e460c33ae48..84026e7b8a5f 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -8,6 +8,7 @@
* NFSv4 namespace
*/
+#include <linux/module.h>
#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
@@ -21,37 +22,64 @@
#include <linux/inet.h>
#include "internal.h"
#include "nfs4_fs.h"
+#include "nfs.h"
#include "dns_resolve.h"
#define NFSDBG_FACILITY NFSDBG_VFS
/*
+ * Work out the length that an NFSv4 path would render to as a standard posix
+ * path, with a leading slash but no terminating slash.
+ */
+static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname)
+{
+ ssize_t len = 0;
+ int i;
+
+ for (i = 0; i < pathname->ncomponents; i++) {
+ const struct nfs4_string *component = &pathname->components[i];
+
+ if (component->len > NAME_MAX)
+ goto too_long;
+ len += 1 + component->len; /* Adding "/foo" */
+ if (len > PATH_MAX)
+ goto too_long;
+ }
+ return len;
+
+too_long:
+ return -ENAMETOOLONG;
+}
+
+/*
* Convert the NFSv4 pathname components into a standard posix path.
- *
- * Note that the resulting string will be placed at the end of the buffer
*/
-static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
- char *buffer, ssize_t buflen)
+static char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
+ unsigned short *_len)
{
- char *end = buffer + buflen;
- int n;
+ ssize_t len;
+ char *buf, *p;
+ int i;
+
+ len = nfs4_pathname_len(pathname);
+ if (len < 0)
+ return ERR_PTR(len);
+ *_len = len;
+
+ p = buf = kmalloc(len + 1, GFP_KERNEL);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < pathname->ncomponents; i++) {
+ const struct nfs4_string *component = &pathname->components[i];
- *--end = '\0';
- buflen--;
-
- n = pathname->ncomponents;
- while (--n >= 0) {
- const struct nfs4_string *component = &pathname->components[n];
- buflen -= component->len + 1;
- if (buflen < 0)
- goto Elong;
- end -= component->len;
- memcpy(end, component->data, component->len);
- *--end = '/';
+ *p++ = '/';
+ memcpy(p, component->data, component->len);
+ p += component->len;
}
- return end;
-Elong:
- return ERR_PTR(-ENAMETOOLONG);
+
+ *p = 0;
+ return buf;
}
/*
@@ -100,21 +128,36 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
*/
static int nfs4_validate_fspath(struct dentry *dentry,
const struct nfs4_fs_locations *locations,
- char *page, char *page2)
+ struct nfs_fs_context *ctx)
{
- const char *path, *fs_path;
+ const char *path;
+ char *fs_path;
+ unsigned short len;
+ char *buf;
+ int n;
- path = nfs4_path(dentry, page, PAGE_SIZE);
- if (IS_ERR(path))
+ buf = kmalloc(4096, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ path = nfs4_path(dentry, buf, 4096);
+ if (IS_ERR(path)) {
+ kfree(buf);
return PTR_ERR(path);
+ }
- fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
- if (IS_ERR(fs_path))
+ fs_path = nfs4_pathname_string(&locations->fs_path, &len);
+ if (IS_ERR(fs_path)) {
+ kfree(buf);
return PTR_ERR(fs_path);
+ }
- if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+ n = strncmp(path, fs_path, len);
+ kfree(buf);
+ kfree(fs_path);
+ if (n != 0) {
dprintk("%s: path %s does not begin with fsroot %s\n",
- __func__, path, fs_path);
+ __func__, path, ctx->nfs_server.export_path);
return -ENOENT;
}
@@ -236,55 +279,77 @@ out:
return new;
}
-static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
- char *page, char *page2,
- const struct nfs4_fs_location *location)
+static int try_location(struct fs_context *fc,
+ const struct nfs4_fs_location *location)
{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
- struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client);
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
- char *mnt_path;
- unsigned int maxbuflen;
- unsigned int s;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ unsigned int len, s;
+ char *export_path, *source, *p;
+ int ret = -ENOENT;
+
+ /* Allocate a buffer big enough to hold any of the hostnames plus a
+ * terminating char and also a buffer big enough to hold the hostname
+ * plus a colon plus the path.
+ */
+ len = 0;
+ for (s = 0; s < location->nservers; s++) {
+ const struct nfs4_string *buf = &location->servers[s];
+ if (buf->len > len)
+ len = buf->len;
+ }
- mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
- if (IS_ERR(mnt_path))
- return ERR_CAST(mnt_path);
- mountdata->mnt_path = mnt_path;
- maxbuflen = mnt_path - 1 - page2;
+ kfree(ctx->nfs_server.hostname);
+ ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL);
+ if (!ctx->nfs_server.hostname)
+ return -ENOMEM;
- mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL);
- if (mountdata->addr == NULL)
- return ERR_PTR(-ENOMEM);
+ export_path = nfs4_pathname_string(&location->rootpath,
+ &ctx->nfs_server.export_path_len);
+ if (IS_ERR(export_path))
+ return PTR_ERR(export_path);
+
+ ctx->nfs_server.export_path = export_path;
+
+ source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
+ GFP_KERNEL);
+ if (!source)
+ return -ENOMEM;
+ kfree(fc->source);
+ fc->source = source;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
- if (buf->len <= 0 || buf->len >= maxbuflen)
- continue;
-
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
continue;
- mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len,
- mountdata->addr, addr_bufsize, net);
- if (mountdata->addrlen == 0)
+ ctx->nfs_server.addrlen =
+ nfs_parse_server_name(buf->data, buf->len,
+ &ctx->nfs_server.address,
+ sizeof(ctx->nfs_server._address),
+ fc->net_ns);
+ if (ctx->nfs_server.addrlen == 0)
continue;
- memcpy(page2, buf->data, buf->len);
- page2[buf->len] = '\0';
- mountdata->hostname = page2;
+ rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
- snprintf(page, PAGE_SIZE, "%s:%s",
- mountdata->hostname,
- mountdata->mnt_path);
+ memcpy(ctx->nfs_server.hostname, buf->data, buf->len);
+ ctx->nfs_server.hostname[buf->len] = '\0';
- mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata);
- if (!IS_ERR(mnt))
- break;
+ p = source;
+ memcpy(p, buf->data, buf->len);
+ p += buf->len;
+ *p++ = ':';
+ memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len);
+ p += ctx->nfs_server.export_path_len;
+ *p = 0;
+
+ ret = nfs4_get_referral_tree(fc);
+ if (ret == 0)
+ return 0;
}
- kfree(mountdata->addr);
- return mnt;
+
+ return ret;
}
/**
@@ -293,38 +358,23 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
* @locations: array of NFSv4 server location information
*
*/
-static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
- const struct nfs4_fs_locations *locations)
+static int nfs_follow_referral(struct fs_context *fc,
+ const struct nfs4_fs_locations *locations)
{
- struct vfsmount *mnt = ERR_PTR(-ENOENT);
- struct nfs_clone_mount mountdata = {
- .sb = dentry->d_sb,
- .dentry = dentry,
- .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
- };
- char *page = NULL, *page2 = NULL;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
int loc, error;
if (locations == NULL || locations->nlocations <= 0)
- goto out;
-
- dprintk("%s: referral at %pd2\n", __func__, dentry);
-
- page = (char *) __get_free_page(GFP_USER);
- if (!page)
- goto out;
+ return -ENOENT;
- page2 = (char *) __get_free_page(GFP_USER);
- if (!page2)
- goto out;
+ dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry);
/* Ensure fs path is a prefix of current dentry path */
- error = nfs4_validate_fspath(dentry, locations, page, page2);
- if (error < 0) {
- mnt = ERR_PTR(error);
- goto out;
- }
+ error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx);
+ if (error < 0)
+ return error;
+ error = -ENOENT;
for (loc = 0; loc < locations->nlocations; loc++) {
const struct nfs4_fs_location *location = &locations->locations[loc];
@@ -332,15 +382,12 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
location->rootpath.ncomponents == 0)
continue;
- mnt = try_location(&mountdata, page, page2, location);
- if (!IS_ERR(mnt))
- break;
+ error = try_location(fc, location);
+ if (error == 0)
+ return 0;
}
-out:
- free_page((unsigned long) page);
- free_page((unsigned long) page2);
- return mnt;
+ return error;
}
/*
@@ -348,71 +395,72 @@ out:
* @dentry - dentry of referral
*
*/
-static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
+static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
{
- struct vfsmount *mnt = ERR_PTR(-ENOMEM);
- struct dentry *parent;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry, *parent;
struct nfs4_fs_locations *fs_locations = NULL;
struct page *page;
- int err;
+ int err = -ENOMEM;
/* BUG_ON(IS_ROOT(dentry)); */
page = alloc_page(GFP_KERNEL);
- if (page == NULL)
- return mnt;
+ if (!page)
+ return -ENOMEM;
fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (fs_locations == NULL)
+ if (!fs_locations)
goto out_free;
/* Get locations */
- mnt = ERR_PTR(-ENOENT);
-
+ dentry = ctx->clone_data.dentry;
parent = dget_parent(dentry);
dprintk("%s: getting locations for %pd2\n",
__func__, dentry);
err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
dput(parent);
- if (err != 0 ||
- fs_locations->nlocations <= 0 ||
+ if (err != 0)
+ goto out_free_2;
+
+ err = -ENOENT;
+ if (fs_locations->nlocations <= 0 ||
fs_locations->fs_path.ncomponents <= 0)
- goto out_free;
+ goto out_free_2;
- mnt = nfs_follow_referral(dentry, fs_locations);
+ err = nfs_follow_referral(fc, fs_locations);
+out_free_2:
+ kfree(fs_locations);
out_free:
__free_page(page);
- kfree(fs_locations);
- return mnt;
+ return err;
}
-struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
- struct nfs_fh *fh, struct nfs_fattr *fattr)
+int nfs4_submount(struct fs_context *fc, struct nfs_server *server)
{
- rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct dentry *dentry = ctx->clone_data.dentry;
struct dentry *parent = dget_parent(dentry);
struct inode *dir = d_inode(parent);
- const struct qstr *name = &dentry->d_name;
struct rpc_clnt *client;
- struct vfsmount *mnt;
+ int ret;
/* Look it up again to get its attributes and sec flavor */
- client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
+ client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh,
+ ctx->clone_data.fattr);
dput(parent);
if (IS_ERR(client))
- return ERR_CAST(client);
+ return PTR_ERR(client);
- if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
- mnt = nfs_do_refmount(client, dentry);
- goto out;
+ ctx->selected_flavor = client->cl_auth->au_flavor;
+ if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+ ret = nfs_do_refmount(fc, client);
+ } else {
+ ret = nfs_do_submount(fc);
}
- if (client->cl_auth->au_flavor != flavor)
- flavor = client->cl_auth->au_flavor;
- mnt = nfs_do_submount(dentry, fh, fattr, flavor);
-out:
rpc_shutdown_client(client);
- return mnt;
+ return ret;
}
/*
@@ -453,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
rpc_set_port(sap, NFS_PORT);
error = -ENOMEM;
- hostname = kstrndup(buf->data, buf->len, GFP_KERNEL);
+ hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
if (hostname == NULL)
break;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 76d37161409a..95d07a3dc5d1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1097,11 +1097,12 @@ static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup)
return ret;
}
-static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
- struct nfs_server *server,
- struct rpc_message *msg,
- struct nfs4_sequence_args *args,
- struct nfs4_sequence_res *res)
+static int nfs4_do_call_sync(struct rpc_clnt *clnt,
+ struct nfs_server *server,
+ struct rpc_message *msg,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res,
+ unsigned short task_flags)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_call_sync_data data = {
@@ -1113,12 +1114,23 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
.rpc_client = clnt,
.rpc_message = msg,
.callback_ops = clp->cl_mvops->call_sync_ops,
- .callback_data = &data
+ .callback_data = &data,
+ .flags = task_flags,
};
return nfs4_call_sync_custom(&task_setup);
}
+static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
+ struct nfs_server *server,
+ struct rpc_message *msg,
+ struct nfs4_sequence_args *args,
+ struct nfs4_sequence_res *res)
+{
+ return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
+}
+
+
int nfs4_call_sync(struct rpc_clnt *clnt,
struct nfs_server *server,
struct rpc_message *msg,
@@ -3187,6 +3199,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
exception.retry = 1;
continue;
}
+ if (status == -NFS4ERR_EXPIRED) {
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ exception.retry = 1;
+ continue;
+ }
if (status == -EAGAIN) {
/* We must have found a delegation */
exception.retry = 1;
@@ -3239,6 +3256,8 @@ static int _nfs4_do_setattr(struct inode *inode,
nfs_put_lock_context(l_ctx);
if (status == -EIO)
return -EBADF;
+ else if (status == -EAGAIN)
+ goto zero_stateid;
} else {
zero_stateid:
nfs4_stateid_copy(&arg->stateid, &zero_stateid);
@@ -4064,11 +4083,18 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
nfs_fattr_init(fattr);
- return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+ return nfs4_do_call_sync(server->client, server, &msg,
+ &args.seq_args, &res.seq_res, task_flags);
}
int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -4156,7 +4182,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
+ struct dentry *dentry, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs_server *server = NFS_SERVER(dir);
@@ -4164,7 +4190,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
struct nfs4_lookup_arg args = {
.bitmask = server->attr_bitmask,
.dir_fh = NFS_FH(dir),
- .name = name,
+ .name = &dentry->d_name,
};
struct nfs4_lookup_res res = {
.server = server,
@@ -4177,13 +4203,20 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
.rpc_argp = &args,
.rpc_resp = &res,
};
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
args.bitmask = nfs4_bitmask(server, label);
nfs_fattr_init(fattr);
- dprintk("NFS call lookup %s\n", name->name);
- status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
+ dprintk("NFS call lookup %pd2\n", dentry);
+ nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
+ status = nfs4_do_call_sync(clnt, server, &msg,
+ &args.seq_args, &res.seq_res, task_flags);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -4197,16 +4230,17 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
}
static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
- const struct qstr *name, struct nfs_fh *fhandle,
+ struct dentry *dentry, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct nfs4_label *label)
{
struct nfs4_exception exception = {
.interruptible = true,
};
struct rpc_clnt *client = *clnt;
+ const struct qstr *name = &dentry->d_name;
int err;
do {
- err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label);
+ err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label);
trace_nfs4_lookup(dir, name, err);
switch (err) {
case -NFS4ERR_BADNAME:
@@ -4241,14 +4275,14 @@ out:
return err;
}
-static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
+static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
int status;
struct rpc_clnt *client = NFS_CLIENT(dir);
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label);
if (client != NFS_CLIENT(dir)) {
rpc_shutdown_client(client);
nfs_fixup_secinfo_attributes(fattr);
@@ -4257,13 +4291,13 @@ static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name,
}
struct rpc_clnt *
-nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name,
+nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr)
{
struct rpc_clnt *client = NFS_CLIENT(dir);
int status;
- status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
+ status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL);
if (status < 0)
return ERR_PTR(status);
return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
@@ -5019,16 +5053,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
struct nfs4_exception exception = {
.interruptible = true,
};
- unsigned long now = jiffies;
int err;
do {
err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
if (err == 0) {
- nfs4_set_lease_period(server->nfs_client,
- fsinfo->lease_time * HZ,
- now);
+ nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ);
break;
}
err = nfs4_handle_exception(server, err, &exception);
@@ -5582,10 +5613,9 @@ out:
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
+ struct page **pages;
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
- .acl_pages = pages,
.acl_len = buflen,
};
struct nfs_getaclres res = {
@@ -5596,11 +5626,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+ unsigned int npages;
int ret = -ENOMEM, i;
+ struct nfs_server *server = NFS_SERVER(inode);
- if (npages > ARRAY_SIZE(pages))
- return -ERANGE;
+ if (buflen == 0)
+ buflen = server->rsize;
+
+ npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS);
+ if (!pages)
+ return -ENOMEM;
+
+ args.acl_pages = pages;
for (i = 0; i < npages; i++) {
pages[i] = alloc_page(GFP_KERNEL);
@@ -5646,6 +5684,7 @@ out_free:
__free_page(pages[i]);
if (res.acl_scratch)
__free_page(res.acl_scratch);
+ kfree(pages);
return ret;
}
@@ -6084,6 +6123,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.callback_data = &setclientid,
.flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN,
};
+ unsigned long now = jiffies;
int status;
/* nfs_client_id4 */
@@ -6116,6 +6156,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
put_rpccred(setclientid.sc_cred);
}
+
+ if (status == 0)
+ do_renew_lease(clp, now);
out:
trace_nfs4_setclientid(clp, status);
dprintk("NFS reply setclientid: %d\n", status);
@@ -6859,7 +6902,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_
case -NFS4ERR_STALE_STATEID:
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
nfs4_schedule_lease_recovery(server->nfs_client);
- };
+ }
}
static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
@@ -8203,6 +8246,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
struct rpc_task *task;
struct nfs41_exchange_id_args *argp;
struct nfs41_exchange_id_res *resp;
+ unsigned long now = jiffies;
int status;
task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL);
@@ -8223,6 +8267,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre
if (status != 0)
goto out;
+ do_renew_lease(clp, now);
+
clp->cl_clientid = resp->clientid;
clp->cl_exchange_flags = resp->flags;
clp->cl_seqid = resp->seqid;
@@ -8626,7 +8672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
case -EACCES:
case -EAGAIN:
goto out;
- };
+ }
clp->cl_seqid++;
if (!status) {
@@ -10001,7 +10047,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
.submount = nfs4_submount,
- .try_mount = nfs4_try_mount,
+ .try_get_tree = nfs4_try_get_tree,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 6ea431b067dd..ff876dda7f06 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp)
*
* @clp: pointer to nfs_client
* @lease: new value for lease period
- * @lastrenewed: time at which lease was last renewed
*/
void nfs4_set_lease_period(struct nfs_client *clp,
- unsigned long lease,
- unsigned long lastrenewed)
+ unsigned long lease)
{
spin_lock(&clp->cl_lock);
clp->cl_lease_time = lease;
- clp->cl_last_renewal = lastrenewed;
spin_unlock(&clp->cl_lock);
/* Cap maximum reconnect timeout at 1/2 lease period */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 34552329233d..f7723d221945 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp)
{
int status;
struct nfs_fsinfo fsinfo;
- unsigned long now;
if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
nfs4_schedule_state_renewal(clp);
return 0;
}
- now = jiffies;
status = nfs4_proc_get_lease_time(clp, &fsinfo);
if (status == 0) {
- nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
+ nfs4_set_lease_period(clp, fsinfo.lease_time * HZ);
nfs4_schedule_state_renewal(clp);
}
@@ -766,6 +764,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
list_del(&state->open_states);
spin_unlock(&inode->i_lock);
spin_unlock(&owner->so_lock);
+ nfs4_inode_return_delegation_on_close(inode);
iput(inode);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
@@ -1135,7 +1134,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
case -NFS4ERR_MOVED:
/* Non-seqid mutating errors */
return;
- };
+ }
/*
* Note: no locking needed as we are guaranteed to be first
* on the sequence list
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 2c9cbade561a..1475f932d7da 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -4,6 +4,7 @@
*/
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/mount.h>
#include <linux/nfs4_mount.h>
#include <linux/nfs_fs.h>
#include "delegation.h"
@@ -18,36 +19,6 @@
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
static void nfs4_evict_inode(struct inode *inode);
-static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-
-static struct file_system_type nfs4_remote_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-static struct file_system_type nfs4_remote_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_referral_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-
-struct file_system_type nfs4_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_referral_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
@@ -61,16 +32,15 @@ static const struct super_operations nfs4_sops = {
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
- .remount_fs = nfs_remount,
};
struct nfs_subversion nfs_v4 = {
- .owner = THIS_MODULE,
- .nfs_fs = &nfs4_fs_type,
- .rpc_vers = &nfs_version4,
- .rpc_ops = &nfs_v4_clientops,
- .sops = &nfs4_sops,
- .xattr = nfs4_xattr_handlers,
+ .owner = THIS_MODULE,
+ .nfs_fs = &nfs4_fs_type,
+ .rpc_vers = &nfs_version4,
+ .rpc_ops = &nfs_v4_clientops,
+ .sops = &nfs4_sops,
+ .xattr = nfs4_xattr_handlers,
};
static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -101,53 +71,6 @@ static void nfs4_evict_inode(struct inode *inode)
nfs_clear_inode(inode);
}
-/*
- * Get the superblock for the NFS4 root partition
- */
-static struct dentry *
-nfs4_remote_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *info)
-{
- struct nfs_mount_info *mount_info = info;
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- mount_info->set_security = nfs_set_sb_security;
-
- /* Get a volume representation */
- server = nfs4_create_server(mount_info, &nfs_v4);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
- }
-
- mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
-
-out:
- return mntroot;
-}
-
-static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
- int flags, void *data, const char *hostname)
-{
- struct vfsmount *root_mnt;
- char *root_devname;
- size_t len;
-
- len = strlen(hostname) + 5;
- root_devname = kmalloc(len, GFP_KERNEL);
- if (root_devname == NULL)
- return ERR_PTR(-ENOMEM);
- /* Does hostname needs to be enclosed in brackets? */
- if (strchr(hostname, ':'))
- snprintf(root_devname, len, "[%s]:/", hostname);
- else
- snprintf(root_devname, len, "%s:/", hostname);
- root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
- kfree(root_devname);
- return root_mnt;
-}
-
struct nfs_referral_count {
struct list_head list;
const struct task_struct *task;
@@ -214,111 +137,125 @@ static void nfs_referral_loop_unprotect(void)
kfree(p);
}
-static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
- const char *export_path)
+static int do_nfs4_mount(struct nfs_server *server,
+ struct fs_context *fc,
+ const char *hostname,
+ const char *export_path)
{
+ struct nfs_fs_context *root_ctx;
+ struct fs_context *root_fc;
+ struct vfsmount *root_mnt;
struct dentry *dentry;
- int err;
+ size_t len;
+ int ret;
- if (IS_ERR(root_mnt))
- return ERR_CAST(root_mnt);
+ struct fs_parameter param = {
+ .key = "source",
+ .type = fs_value_is_string,
+ .dirfd = -1,
+ };
- err = nfs_referral_loop_protect();
- if (err) {
- mntput(root_mnt);
- return ERR_PTR(err);
+ if (IS_ERR(server))
+ return PTR_ERR(server);
+
+ root_fc = vfs_dup_fs_context(fc);
+ if (IS_ERR(root_fc)) {
+ nfs_free_server(server);
+ return PTR_ERR(root_fc);
}
+ kfree(root_fc->source);
+ root_fc->source = NULL;
- dentry = mount_subtree(root_mnt, export_path);
- nfs_referral_loop_unprotect();
+ root_ctx = nfs_fc2context(root_fc);
+ root_ctx->internal = true;
+ root_ctx->server = server;
+ /* We leave export_path unset as it's not used to find the root. */
- return dentry;
-}
+ len = strlen(hostname) + 5;
+ param.string = kmalloc(len, GFP_KERNEL);
+ if (param.string == NULL) {
+ put_fs_context(root_fc);
+ return -ENOMEM;
+ }
-struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
-{
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
- struct nfs_parsed_mount_data *data = mount_info->parsed;
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ param.size = snprintf(param.string, len, "[%s]:/", hostname);
+ else
+ param.size = snprintf(param.string, len, "%s:/", hostname);
+ ret = vfs_parse_fs_param(root_fc, &param);
+ kfree(param.string);
+ if (ret < 0) {
+ put_fs_context(root_fc);
+ return ret;
+ }
+ root_mnt = fc_mount(root_fc);
+ put_fs_context(root_fc);
+
+ if (IS_ERR(root_mnt))
+ return PTR_ERR(root_mnt);
- dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+ ret = nfs_referral_loop_protect();
+ if (ret) {
+ mntput(root_mnt);
+ return ret;
+ }
- export_path = data->nfs_server.export_path;
- data->nfs_server.export_path = "/";
- root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
- data->nfs_server.hostname);
- data->nfs_server.export_path = export_path;
+ dentry = mount_subtree(root_mnt, export_path);
+ nfs_referral_loop_unprotect();
- res = nfs_follow_remote_path(root_mnt, export_path);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
- return res;
+ fc->root = dentry;
+ return 0;
}
-static struct dentry *
-nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
+int nfs4_try_get_tree(struct fs_context *fc)
{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_fill_super,
- .set_security = nfs_clone_sb_security,
- .cloned = raw_data,
- };
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- dprintk("--> nfs4_referral_get_sb()\n");
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err;
- mount_info.mntfh = nfs_alloc_fhandle();
- if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
- goto out;
+ dfprintk(MOUNT, "--> nfs4_try_get_tree()\n");
- /* create a new volume representation */
- server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
+ /* We create a mount for the server's root, walk to the requested
+ * location and then create another mount for that.
+ */
+ err= do_nfs4_mount(nfs4_create_server(fc),
+ fc, ctx->nfs_server.hostname,
+ ctx->nfs_server.export_path);
+ if (err) {
+ nfs_errorf(fc, "NFS4: Couldn't follow remote path");
+ dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err);
+ } else {
+ dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n");
}
-
- mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
-out:
- nfs_free_fhandle(mount_info.mntfh);
- return mntroot;
+ return err;
}
/*
* Create an NFS4 server record on referral traversal
*/
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
+int nfs4_get_referral_tree(struct fs_context *fc)
{
- struct nfs_clone_mount *data = raw_data;
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ int err;
dprintk("--> nfs4_referral_mount()\n");
- export_path = data->mnt_path;
- data->mnt_path = "/";
-
- root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
- flags, data, data->hostname);
- data->mnt_path = export_path;
-
- res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %d%s\n",
- PTR_ERR_OR_ZERO(res),
- IS_ERR(res) ? " [error]" : "");
- return res;
+ /* create a new volume representation */
+ err = do_nfs4_mount(nfs4_create_referral_server(fc),
+ fc, ctx->nfs_server.hostname,
+ ctx->nfs_server.export_path);
+ if (err) {
+ nfs_errorf(fc, "NFS4: Couldn't follow remote path");
+ dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err);
+ } else {
+ dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n");
+ }
+ return err;
}
-
static int __init init_nfs_v4(void)
{
int err;
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
index 1a8f376b3f73..d9ac556bebcf 100644
--- a/fs/nfs/nfs4trace.c
+++ b/fs/nfs/nfs4trace.c
@@ -24,4 +24,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist);
EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error);
#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index e60b6fbd5ada..1e97e5e04cb4 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -155,6 +155,9 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+
#define show_nfsv4_errors(error) \
__print_symbolic(error, \
{ NFS4_OK, "OK" }, \
@@ -305,7 +308,10 @@ TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
{ NFS4ERR_WRONGSEC, "WRONGSEC" }, \
{ NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
{ NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
- { NFS4ERR_XDEV, "XDEV" })
+ { NFS4ERR_XDEV, "XDEV" }, \
+ /* ***** Internal to Linux NFS client ***** */ \
+ { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
+ { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
#define show_open_flags(flags) \
__print_flags(flags, "|", \
@@ -352,7 +358,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
),
TP_fast_assign(
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__assign_str(dstaddr, clp->cl_hostname);
),
@@ -432,7 +438,8 @@ TRACE_EVENT(nfs4_sequence_done,
__entry->target_highest_slotid =
res->sr_target_highest_slotid;
__entry->status_flags = res->sr_status_flags;
- __entry->error = res->sr_status;
+ __entry->error = res->sr_status < 0 ?
+ -res->sr_status : 0;
),
TP_printk(
"error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
@@ -640,7 +647,7 @@ TRACE_EVENT(nfs4_state_mgr_failed,
),
TP_fast_assign(
- __entry->error = status;
+ __entry->error = status < 0 ? -status : 0;
__entry->state = clp->cl_state;
__assign_str(hostname, clp->cl_hostname);
__assign_str(section, section);
@@ -659,7 +666,7 @@ TRACE_EVENT(nfs4_xdr_status,
TP_PROTO(
const struct xdr_stream *xdr,
u32 op,
- int error
+ u32 error
),
TP_ARGS(xdr, op, error),
@@ -691,6 +698,41 @@ TRACE_EVENT(nfs4_xdr_status,
)
);
+DECLARE_EVENT_CLASS(nfs4_cb_error_class,
+ TP_PROTO(
+ __be32 xid,
+ u32 cb_ident
+ ),
+
+ TP_ARGS(xid, cb_ident),
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, cbident)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(xid);
+ __entry->cbident = cb_ident;
+ ),
+
+ TP_printk(
+ "xid=0x%08x cb_ident=0x%08x",
+ __entry->xid, __entry->cbident
+ )
+);
+
+#define DEFINE_CB_ERROR_EVENT(name) \
+ DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \
+ TP_PROTO( \
+ __be32 xid, \
+ u32 cb_ident \
+ ), \
+ TP_ARGS(xid, cb_ident))
+
+DEFINE_CB_ERROR_EVENT(no_clp);
+DEFINE_CB_ERROR_EVENT(badprinc);
+
DECLARE_EVENT_CLASS(nfs4_open_event,
TP_PROTO(
const struct nfs_open_context *ctx,
@@ -849,7 +891,7 @@ TRACE_EVENT(nfs4_close,
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->fmode = (__force unsigned int)state->state;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(args->stateid.seqid);
__entry->stateid_hash =
@@ -914,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
TP_fast_assign(
const struct inode *inode = state->inode;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
__entry->type = request->fl_type;
__entry->start = request->fl_start;
@@ -986,7 +1028,7 @@ TRACE_EVENT(nfs4_set_lock,
TP_fast_assign(
const struct inode *inode = state->inode;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->cmd = cmd;
__entry->type = request->fl_type;
__entry->start = request->fl_start;
@@ -1164,7 +1206,7 @@ TRACE_EVENT(nfs4_delegreturn_exit,
TP_fast_assign(
__entry->dev = res->server->s_dev;
__entry->fhandle = nfs_fhandle_hash(args->fhandle);
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(args->stateid->seqid);
__entry->stateid_hash =
@@ -1204,7 +1246,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
TP_fast_assign(
const struct inode *inode = state->inode;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
@@ -1306,7 +1348,7 @@ TRACE_EVENT(nfs4_lookupp,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = NFS_FILEID(inode);
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
),
TP_printk(
@@ -1342,7 +1384,7 @@ TRACE_EVENT(nfs4_rename,
__entry->dev = olddir->i_sb->s_dev;
__entry->olddir = NFS_FILEID(olddir);
__entry->newdir = NFS_FILEID(newdir);
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__assign_str(oldname, oldname->name);
__assign_str(newname, newname->name);
),
@@ -1433,7 +1475,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(stateid->seqid);
__entry->stateid_hash =
@@ -1489,7 +1531,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event,
__entry->valid = fattr->valid;
__entry->fhandle = nfs_fhandle_hash(fhandle);
__entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
),
TP_printk(
@@ -1536,7 +1578,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
),
TP_fast_assign(
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->fhandle = nfs_fhandle_hash(fhandle);
if (!IS_ERR_OR_NULL(inode)) {
__entry->fileid = NFS_FILEID(inode);
@@ -1593,7 +1635,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
),
TP_fast_assign(
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->fhandle = nfs_fhandle_hash(fhandle);
if (!IS_ERR_OR_NULL(inode)) {
__entry->fileid = NFS_FILEID(inode);
@@ -1694,7 +1736,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
__field(u32, fhandle)
__field(u64, fileid)
__field(loff_t, offset)
- __field(size_t, count)
+ __field(u32, arg_count)
+ __field(u32, res_count)
__field(unsigned long, error)
__field(int, stateid_seq)
__field(u32, stateid_hash)
@@ -1702,13 +1745,18 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
const struct nfs4_state *state =
hdr->args.context->state;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = hdr->args.offset;
- __entry->count = hdr->args.count;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
__entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
@@ -1718,14 +1766,14 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu stateid=%d:0x%08x",
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count,
+ __entry->arg_count, __entry->res_count,
__entry->stateid_seq, __entry->stateid_hash
)
);
@@ -1754,7 +1802,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
__field(u32, fhandle)
__field(u64, fileid)
__field(loff_t, offset)
- __field(size_t, count)
+ __field(u32, arg_count)
+ __field(u32, res_count)
__field(unsigned long, error)
__field(int, stateid_seq)
__field(u32, stateid_hash)
@@ -1762,13 +1811,18 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
const struct nfs4_state *state =
hdr->args.context->state;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = hdr->args.offset;
- __entry->count = hdr->args.count;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
__entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
@@ -1778,14 +1832,14 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu stateid=%d:0x%08x",
+ "offset=%lld count=%u res=%u stateid=%d:0x%08x",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset,
- __entry->count,
+ __entry->arg_count, __entry->res_count,
__entry->stateid_seq, __entry->stateid_hash
)
);
@@ -1814,24 +1868,28 @@ DECLARE_EVENT_CLASS(nfs4_commit_event,
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
- __field(loff_t, offset)
- __field(size_t, count)
__field(unsigned long, error)
+ __field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
const struct inode *inode = data->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
+
__entry->dev = inode->i_sb->s_dev;
- __entry->fileid = NFS_FILEID(inode);
- __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
__entry->offset = data->args.offset;
__entry->count = data->args.count;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
),
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%zu",
+ "offset=%lld count=%u",
-__entry->error,
show_nfsv4_errors(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1896,7 +1954,7 @@ TRACE_EVENT(nfs4_layoutget,
__entry->iomode = args->iomode;
__entry->offset = args->offset;
__entry->count = args->length;
- __entry->error = error;
+ __entry->error = error < 0 ? -error : 0;
__entry->stateid_seq =
be32_to_cpu(state->stateid.seqid);
__entry->stateid_hash =
@@ -2094,6 +2152,115 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
+DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
+ TP_PROTO(
+ const struct nfs_pgio_header *hdr
+ ),
+
+ TP_ARGS(hdr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ __string(dstaddr, hdr->ds_clp ?
+ rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = hdr->inode;
+
+ __entry->error = hdr->res.op_status;
+ __entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
+ __entry->stateid_seq =
+ be32_to_cpu(hdr->args.stateid.seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(&hdr->args.stateid);
+ __assign_str(dstaddr, hdr->ds_clp ?
+ rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown");
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->offset, __entry->count,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __get_str(dstaddr)
+ )
+);
+
+#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
+ DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
+ TP_PROTO( \
+ const struct nfs_pgio_header *hdr \
+ ), \
+ TP_ARGS(hdr))
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
+DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
+
+TRACE_EVENT(ff_layout_commit_error,
+ TP_PROTO(
+ const struct nfs_commit_data *data
+ ),
+
+ TP_ARGS(data),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __string(dstaddr, data->ds_clp ?
+ rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = data->inode;
+
+ __entry->error = data->res.op_status;
+ __entry->fhandle = nfs_fhandle_hash(data->args.fh);
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->offset = data->args.offset;
+ __entry->count = data->args.count;
+ __assign_str(dstaddr, data->ds_clp ?
+ rpc_peeraddr2str(data->ds_clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown");
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%llu count=%u dstaddr=%s",
+ -__entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->offset, __entry->count,
+ __get_str(dstaddr)
+ )
+);
+
+
#endif /* CONFIG_NFS_V4_1 */
#endif /* _TRACE_NFS4_H */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 728d88b6a698..47817ef0aadb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1061,7 +1061,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
static __be32 *
xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t)
{
- p = xdr_encode_hyper(p, (__s64)t->tv_sec);
+ p = xdr_encode_hyper(p, t->tv_sec);
*p++ = cpu_to_be32(t->tv_nsec);
return p;
}
@@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi
static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
{
+ struct nfs_writeverf *verf = res->verf;
int status;
status = decode_op_hdr(xdr, OP_COMMIT);
if (!status)
- status = decode_write_verifier(xdr, &res->verf->verifier);
+ status = decode_write_verifier(xdr, &verf->verifier);
+ if (!status)
+ verf->committed = NFS_FILE_SYNC;
return status;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 2a82dcce5fc1..a9588d19a5ae 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -198,7 +198,66 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
DEFINE_NFS_INODE_EVENT(nfs_access_enter);
-DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
+
+TRACE_EVENT(nfs_access_exit,
+ TP_PROTO(
+ const struct inode *inode,
+ unsigned int mask,
+ unsigned int permitted,
+ int error
+ ),
+
+ TP_ARGS(inode, mask, permitted, error),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(unsigned char, type)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, size)
+ __field(unsigned long, nfsi_flags)
+ __field(unsigned long, cache_validity)
+ __field(unsigned int, mask)
+ __field(unsigned int, permitted)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ __entry->error = error < 0 ? -error : 0;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->type = nfs_umode_to_dtype(inode->i_mode);
+ __entry->version = inode_peek_iversion_raw(inode);
+ __entry->size = i_size_read(inode);
+ __entry->nfsi_flags = nfsi->flags;
+ __entry->cache_validity = nfsi->cache_validity;
+ __entry->mask = mask;
+ __entry->permitted = permitted;
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "type=%u (%s) version=%llu size=%lld "
+ "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) "
+ "mask=0x%x permitted=0x%x",
+ -__entry->error, nfs_show_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->type,
+ nfs_show_file_type(__entry->type),
+ (unsigned long long)__entry->version,
+ (long long)__entry->size,
+ __entry->cache_validity,
+ nfs_show_cache_validity(__entry->cache_validity),
+ __entry->nfsi_flags,
+ nfs_show_nfsi_flags(__entry->nfsi_flags),
+ __entry->mask, __entry->permitted
+ )
+);
TRACE_DEFINE_ENUM(LOOKUP_FOLLOW);
TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY);
@@ -818,75 +877,85 @@ TRACE_EVENT(nfs_sillyrename_unlink,
TRACE_EVENT(nfs_initiate_read,
TP_PROTO(
- const struct inode *inode,
- loff_t offset, unsigned long count
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, offset, count),
+ TP_ARGS(hdr),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
- __entry->offset = offset;
- __entry->count = count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu",
+ "offset=%lld count=%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count
+ (long long)__entry->offset, __entry->count
)
);
TRACE_EVENT(nfs_readpage_done,
TP_PROTO(
- const struct inode *inode,
- int status, loff_t offset, bool eof
+ const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, status, offset, eof),
+ TP_ARGS(task, hdr),
TP_STRUCT__entry(
- __field(int, status)
- __field(loff_t, offset)
- __field(bool, eof)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(bool, eof)
+ __field(int, status)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
-
- __entry->status = status;
- __entry->offset = offset;
- __entry->eof = eof;
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
+
+ __entry->status = task->tk_status;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->eof = hdr->res.eof;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d%s",
+ "offset=%lld count=%u res=%u status=%d%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->status,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
__entry->eof ? " eof" : ""
)
);
@@ -903,90 +972,144 @@ TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
TRACE_EVENT(nfs_initiate_write,
TP_PROTO(
- const struct inode *inode,
- loff_t offset, unsigned long count,
- enum nfs3_stable_how stable
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, offset, count, stable),
+ TP_ARGS(hdr),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
- __field(enum nfs3_stable_how, stable)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
+ __field(enum nfs3_stable_how, stable)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
- __entry->offset = offset;
- __entry->count = count;
- __entry->stable = stable;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
+ __entry->stable = hdr->args.stable;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu stable=%s",
+ "offset=%lld count=%u stable=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count,
+ (long long)__entry->offset, __entry->count,
nfs_show_stable(__entry->stable)
)
);
TRACE_EVENT(nfs_writeback_done,
TP_PROTO(
- const struct inode *inode,
- int status,
- loff_t offset,
- struct nfs_writeverf *writeverf
+ const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr
),
- TP_ARGS(inode, status, offset, writeverf),
+ TP_ARGS(task, hdr),
TP_STRUCT__entry(
- __field(int, status)
- __field(loff_t, offset)
- __field(enum nfs3_stable_how, stable)
- __field(unsigned long long, verifier)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, arg_count)
+ __field(u32, res_count)
+ __field(int, status)
+ __field(enum nfs3_stable_how, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
),
TP_fast_assign(
+ const struct inode *inode = hdr->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
-
- __entry->status = status;
- __entry->offset = offset;
- __entry->stable = writeverf->committed;
- memcpy(&__entry->verifier, &writeverf->verifier,
- sizeof(__entry->verifier));
+ const struct nfs_fh *fh = hdr->args.fh ?
+ hdr->args.fh : &nfsi->fh;
+ const struct nfs_writeverf *verf = hdr->res.verf;
+
+ __entry->status = task->tk_status;
+ __entry->offset = hdr->args.offset;
+ __entry->arg_count = hdr->args.count;
+ __entry->res_count = hdr->res.count;
+ __entry->stable = verf->committed;
+ memcpy(__entry->verifier,
+ &verf->verifier,
+ NFS4_VERIFIER_SIZE);
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d stable=%s "
- "verifier 0x%016llx",
+ "offset=%lld count=%u res=%u status=%d stable=%s "
+ "verifier=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->status,
+ (long long)__entry->offset, __entry->arg_count,
+ __entry->res_count, __entry->status,
nfs_show_stable(__entry->stable),
- __entry->verifier
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
)
);
+DECLARE_EVENT_CLASS(nfs_page_error_class,
+ TP_PROTO(
+ const struct nfs_page *req,
+ int error
+ ),
+
+ TP_ARGS(req, error),
+
+ TP_STRUCT__entry(
+ __field(const void *, req)
+ __field(pgoff_t, index)
+ __field(unsigned int, offset)
+ __field(unsigned int, pgbase)
+ __field(unsigned int, bytes)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->index = req->wb_index;
+ __entry->offset = req->wb_offset;
+ __entry->pgbase = req->wb_pgbase;
+ __entry->bytes = req->wb_bytes;
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d",
+ __entry->req, __entry->index, __entry->offset,
+ __entry->pgbase, __entry->bytes, __entry->error
+ )
+);
+
+#define DEFINE_NFS_PAGEERR_EVENT(name) \
+ DEFINE_EVENT(nfs_page_error_class, name, \
+ TP_PROTO( \
+ const struct nfs_page *req, \
+ int error \
+ ), \
+ TP_ARGS(req, error))
+
+DEFINE_NFS_PAGEERR_EVENT(nfs_write_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error);
+DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error);
+
TRACE_EVENT(nfs_initiate_commit,
TP_PROTO(
const struct nfs_commit_data *data
@@ -995,71 +1118,81 @@ TRACE_EVENT(nfs_initiate_commit,
TP_ARGS(data),
TP_STRUCT__entry(
- __field(loff_t, offset)
- __field(unsigned long, count)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(u32, count)
),
TP_fast_assign(
const struct inode *inode = data->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
__entry->offset = data->args.offset;
__entry->count = data->args.count;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%lu",
+ "offset=%lld count=%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->count
+ (long long)__entry->offset, __entry->count
)
);
TRACE_EVENT(nfs_commit_done,
TP_PROTO(
+ const struct rpc_task *task,
const struct nfs_commit_data *data
),
- TP_ARGS(data),
+ TP_ARGS(task, data),
TP_STRUCT__entry(
- __field(int, status)
- __field(loff_t, offset)
- __field(unsigned long long, verifier)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
+ __field(loff_t, offset)
+ __field(int, status)
+ __field(enum nfs3_stable_how, stable)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
),
TP_fast_assign(
const struct inode *inode = data->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = data->args.fh ?
+ data->args.fh : &nfsi->fh;
+ const struct nfs_writeverf *verf = data->res.verf;
- __entry->status = data->res.op_status;
+ __entry->status = task->tk_status;
__entry->offset = data->args.offset;
- memcpy(&__entry->verifier, &data->verf.verifier,
- sizeof(__entry->verifier));
+ __entry->stable = verf->committed;
+ memcpy(__entry->verifier,
+ &verf->verifier,
+ NFS4_VERIFIER_SIZE);
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
- __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fhandle = nfs_fhandle_hash(fh);
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d verifier 0x%016llx",
+ "offset=%lld status=%d stable=%s verifier=%s",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- __entry->offset, __entry->status,
- __entry->verifier
+ (long long)__entry->offset, __entry->status,
+ nfs_show_stable(__entry->stable),
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
)
);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cec3070ab577..542ea8dfd1bc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1425,7 +1425,7 @@ retry:
/* lo ref dropped in pnfs_roc_release() */
layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode);
/* If the creds don't match, we can't compound the layoutreturn */
- if (!layoutreturn || cred != lo->plh_lc_cred)
+ if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0)
goto out_noroc;
roc = layoutreturn;
@@ -1998,8 +1998,6 @@ lookup_again:
trace_pnfs_update_layout(ino, pos, count,
iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_INVALID_OPEN);
- if (status != -EAGAIN)
- goto out_unlock;
spin_unlock(&ino->i_lock);
nfs4_schedule_stateid_recovery(server, ctx->state);
pnfs_clear_first_layoutget(lo);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f8a38065c7e4..0fafdadc9c8d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -79,6 +79,10 @@ enum pnfs_try_status {
PNFS_TRY_AGAIN = 2,
};
+/* error codes for internal use */
+#define NFS4ERR_RESET_TO_MDS 12001
+#define NFS4ERR_RESET_TO_PNFS 12002
+
#ifdef CONFIG_NFS_V4_1
#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -91,10 +95,6 @@ enum pnfs_try_status {
#define NFS4_DEF_DS_RETRANS 5
#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
-/* error codes for internal use */
-#define NFS4ERR_RESET_TO_MDS 12001
-#define NFS4ERR_RESET_TO_PNFS 12002
-
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 82af4809b869..8b37e7f8e789 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
/* Fake up some data that will cause nfs_commit_release to retry the writes. */
void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
{
- struct nfs_page *first = nfs_list_entry(data->pages.next);
+ struct nfs_writeverf *verf = data->res.verf;
data->task.tk_status = 0;
- memcpy(&data->verf.verifier, &first->wb_verf,
- sizeof(data->verf.verifier));
- data->verf.verifier.data[0]++; /* ensure verifier mismatch */
+ memset(&verf->verifier, 0, sizeof(verf->verifier));
+ verf->committed = NFS_UNSTABLE;
}
EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0f7288b94633..15c865cc837f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -108,10 +108,15 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
.rpc_resp = fattr,
};
int status;
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
+ task_flags |= RPC_TASK_TIMEOUT;
dprintk("NFS call getattr\n");
nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client, &msg, 0);
+ status = rpc_call_sync(server->client, &msg, task_flags);
dprintk("NFS reply getattr: %d\n", status);
return status;
}
@@ -147,14 +152,14 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
}
static int
-nfs_proc_lookup(struct inode *dir, const struct qstr *name,
+nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fhandle, struct nfs_fattr *fattr,
struct nfs4_label *label)
{
struct nfs_diropargs arg = {
.fh = NFS_FH(dir),
- .name = name->name,
- .len = name->len
+ .name = dentry->d_name.name,
+ .len = dentry->d_name.len
};
struct nfs_diropok res = {
.fh = fhandle,
@@ -166,10 +171,15 @@ nfs_proc_lookup(struct inode *dir, const struct qstr *name,
.rpc_resp = &res,
};
int status;
+ unsigned short task_flags = 0;
+
+ /* Is this is an attribute revalidation, subject to softreval? */
+ if (nfs_lookup_is_soft_revalidate(dentry))
+ task_flags |= RPC_TASK_TIMEOUT;
- dprintk("NFS call lookup %s\n", name->name);
+ dprintk("NFS call lookup %pd2\n", dentry);
nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
dprintk("NFS reply lookup: %d\n", status);
return status;
}
@@ -710,7 +720,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
.submount = nfs_submount,
- .try_mount = nfs_try_mount,
+ .try_get_tree = nfs_try_get_tree,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfe0b586eadd..34bb9add2302 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -214,7 +214,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
task_setup_data->flags |= swap_flags;
rpc_ops->read_setup(hdr, msg);
- trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes);
+ trace_nfs_initiate_read(hdr);
}
static void
@@ -247,8 +247,7 @@ static int nfs_readpage_done(struct rpc_task *task,
return status;
nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
- trace_nfs_readpage_done(inode, task->tk_status,
- hdr->args.offset, hdr->res.eof);
+ trace_nfs_readpage_done(task, hdr);
if (task->tk_status == -ESTALE) {
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -282,6 +281,8 @@ static void nfs_readpage_retry(struct rpc_task *task,
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
+ resp->count = 0;
+ resp->eof = 0;
rpc_restart_call_prepare(task);
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8d8d04bb9d64..dada09b391c6 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -69,250 +69,6 @@
#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
-#define NFS_TEXT_DATA 1
-
-#if IS_ENABLED(CONFIG_NFS_V3)
-#define NFS_DEFAULT_VERSION 3
-#else
-#define NFS_DEFAULT_VERSION 2
-#endif
-
-#define NFS_MAX_CONNECTIONS 16
-
-enum {
- /* Mount options that take no arguments */
- Opt_soft, Opt_softerr, Opt_hard,
- Opt_posix, Opt_noposix,
- Opt_cto, Opt_nocto,
- Opt_ac, Opt_noac,
- Opt_lock, Opt_nolock,
- Opt_udp, Opt_tcp, Opt_rdma,
- Opt_acl, Opt_noacl,
- Opt_rdirplus, Opt_nordirplus,
- Opt_sharecache, Opt_nosharecache,
- Opt_resvport, Opt_noresvport,
- Opt_fscache, Opt_nofscache,
- Opt_migration, Opt_nomigration,
-
- /* Mount options that take integer arguments */
- Opt_port,
- Opt_rsize, Opt_wsize, Opt_bsize,
- Opt_timeo, Opt_retrans,
- Opt_acregmin, Opt_acregmax,
- Opt_acdirmin, Opt_acdirmax,
- Opt_actimeo,
- Opt_namelen,
- Opt_mountport,
- Opt_mountvers,
- Opt_minorversion,
-
- /* Mount options that take string arguments */
- Opt_nfsvers,
- Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
- Opt_addr, Opt_mountaddr, Opt_clientaddr,
- Opt_nconnect,
- Opt_lookupcache,
- Opt_fscache_uniq,
- Opt_local_lock,
-
- /* Special mount options */
- Opt_userspace, Opt_deprecated, Opt_sloppy,
-
- Opt_err
-};
-
-static const match_table_t nfs_mount_option_tokens = {
- { Opt_userspace, "bg" },
- { Opt_userspace, "fg" },
- { Opt_userspace, "retry=%s" },
-
- { Opt_sloppy, "sloppy" },
-
- { Opt_soft, "soft" },
- { Opt_softerr, "softerr" },
- { Opt_hard, "hard" },
- { Opt_deprecated, "intr" },
- { Opt_deprecated, "nointr" },
- { Opt_posix, "posix" },
- { Opt_noposix, "noposix" },
- { Opt_cto, "cto" },
- { Opt_nocto, "nocto" },
- { Opt_ac, "ac" },
- { Opt_noac, "noac" },
- { Opt_lock, "lock" },
- { Opt_nolock, "nolock" },
- { Opt_udp, "udp" },
- { Opt_tcp, "tcp" },
- { Opt_rdma, "rdma" },
- { Opt_acl, "acl" },
- { Opt_noacl, "noacl" },
- { Opt_rdirplus, "rdirplus" },
- { Opt_nordirplus, "nordirplus" },
- { Opt_sharecache, "sharecache" },
- { Opt_nosharecache, "nosharecache" },
- { Opt_resvport, "resvport" },
- { Opt_noresvport, "noresvport" },
- { Opt_fscache, "fsc" },
- { Opt_nofscache, "nofsc" },
- { Opt_migration, "migration" },
- { Opt_nomigration, "nomigration" },
-
- { Opt_port, "port=%s" },
- { Opt_rsize, "rsize=%s" },
- { Opt_wsize, "wsize=%s" },
- { Opt_bsize, "bsize=%s" },
- { Opt_timeo, "timeo=%s" },
- { Opt_retrans, "retrans=%s" },
- { Opt_acregmin, "acregmin=%s" },
- { Opt_acregmax, "acregmax=%s" },
- { Opt_acdirmin, "acdirmin=%s" },
- { Opt_acdirmax, "acdirmax=%s" },
- { Opt_actimeo, "actimeo=%s" },
- { Opt_namelen, "namlen=%s" },
- { Opt_mountport, "mountport=%s" },
- { Opt_mountvers, "mountvers=%s" },
- { Opt_minorversion, "minorversion=%s" },
-
- { Opt_nfsvers, "nfsvers=%s" },
- { Opt_nfsvers, "vers=%s" },
-
- { Opt_sec, "sec=%s" },
- { Opt_proto, "proto=%s" },
- { Opt_mountproto, "mountproto=%s" },
- { Opt_addr, "addr=%s" },
- { Opt_clientaddr, "clientaddr=%s" },
- { Opt_mounthost, "mounthost=%s" },
- { Opt_mountaddr, "mountaddr=%s" },
-
- { Opt_nconnect, "nconnect=%s" },
-
- { Opt_lookupcache, "lookupcache=%s" },
- { Opt_fscache_uniq, "fsc=%s" },
- { Opt_local_lock, "local_lock=%s" },
-
- /* The following needs to be listed after all other options */
- { Opt_nfsvers, "v%s" },
-
- { Opt_err, NULL }
-};
-
-enum {
- Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma,
- Opt_xprt_rdma6,
-
- Opt_xprt_err
-};
-
-static const match_table_t nfs_xprt_protocol_tokens = {
- { Opt_xprt_udp, "udp" },
- { Opt_xprt_udp6, "udp6" },
- { Opt_xprt_tcp, "tcp" },
- { Opt_xprt_tcp6, "tcp6" },
- { Opt_xprt_rdma, "rdma" },
- { Opt_xprt_rdma6, "rdma6" },
-
- { Opt_xprt_err, NULL }
-};
-
-enum {
- Opt_sec_none, Opt_sec_sys,
- Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
- Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
- Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
-
- Opt_sec_err
-};
-
-static const match_table_t nfs_secflavor_tokens = {
- { Opt_sec_none, "none" },
- { Opt_sec_none, "null" },
- { Opt_sec_sys, "sys" },
-
- { Opt_sec_krb5, "krb5" },
- { Opt_sec_krb5i, "krb5i" },
- { Opt_sec_krb5p, "krb5p" },
-
- { Opt_sec_lkey, "lkey" },
- { Opt_sec_lkeyi, "lkeyi" },
- { Opt_sec_lkeyp, "lkeyp" },
-
- { Opt_sec_spkm, "spkm3" },
- { Opt_sec_spkmi, "spkm3i" },
- { Opt_sec_spkmp, "spkm3p" },
-
- { Opt_sec_err, NULL }
-};
-
-enum {
- Opt_lookupcache_all, Opt_lookupcache_positive,
- Opt_lookupcache_none,
-
- Opt_lookupcache_err
-};
-
-static match_table_t nfs_lookupcache_tokens = {
- { Opt_lookupcache_all, "all" },
- { Opt_lookupcache_positive, "pos" },
- { Opt_lookupcache_positive, "positive" },
- { Opt_lookupcache_none, "none" },
-
- { Opt_lookupcache_err, NULL }
-};
-
-enum {
- Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
- Opt_local_lock_none,
-
- Opt_local_lock_err
-};
-
-static match_table_t nfs_local_lock_tokens = {
- { Opt_local_lock_all, "all" },
- { Opt_local_lock_flock, "flock" },
- { Opt_local_lock_posix, "posix" },
- { Opt_local_lock_none, "none" },
-
- { Opt_local_lock_err, NULL }
-};
-
-enum {
- Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0,
- Opt_vers_4_1, Opt_vers_4_2,
-
- Opt_vers_err
-};
-
-static match_table_t nfs_vers_tokens = {
- { Opt_vers_2, "2" },
- { Opt_vers_3, "3" },
- { Opt_vers_4, "4" },
- { Opt_vers_4_0, "4.0" },
- { Opt_vers_4_1, "4.1" },
- { Opt_vers_4_2, "4.2" },
-
- { Opt_vers_err, NULL }
-};
-
-static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-
-struct file_system_type nfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-MODULE_ALIAS_FS("nfs");
-EXPORT_SYMBOL_GPL(nfs_fs_type);
-
-struct file_system_type nfs_xdev_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs",
- .mount = nfs_xdev_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
@@ -326,26 +82,10 @@ const struct super_operations nfs_sops = {
.show_devname = nfs_show_devname,
.show_path = nfs_show_path,
.show_stats = nfs_show_stats,
- .remount_fs = nfs_remount,
};
EXPORT_SYMBOL_GPL(nfs_sops);
#if IS_ENABLED(CONFIG_NFS_V4)
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
-static int nfs4_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args, const char *dev_name);
-
-struct file_system_type nfs4_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs_fs_mount,
- .kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
-};
-MODULE_ALIAS_FS("nfs4");
-MODULE_ALIAS("nfs4");
-EXPORT_SYMBOL_GPL(nfs4_fs_type);
-
static int __init register_nfs4_fs(void)
{
return register_filesystem(&nfs4_fs_type);
@@ -635,6 +375,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", "" },
{ NFS_MOUNT_SOFTERR, ",softerr", "" },
+ { NFS_MOUNT_SOFTREVAL, ",softreval", "" },
{ NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
@@ -931,141 +672,6 @@ void nfs_umount_begin(struct super_block *sb)
}
EXPORT_SYMBOL_GPL(nfs_umount_begin);
-static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
-{
- struct nfs_parsed_mount_data *data;
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (data) {
- data->timeo = NFS_UNSPEC_TIMEO;
- data->retrans = NFS_UNSPEC_RETRANS;
- data->acregmin = NFS_DEF_ACREGMIN;
- data->acregmax = NFS_DEF_ACREGMAX;
- data->acdirmin = NFS_DEF_ACDIRMIN;
- data->acdirmax = NFS_DEF_ACDIRMAX;
- data->mount_server.port = NFS_UNSPEC_PORT;
- data->nfs_server.port = NFS_UNSPEC_PORT;
- data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- data->selected_flavor = RPC_AUTH_MAXFLAVOR;
- data->minorversion = 0;
- data->need_mount = true;
- data->net = current->nsproxy->net_ns;
- data->lsm_opts = NULL;
- }
- return data;
-}
-
-static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
-{
- if (data) {
- kfree(data->client_address);
- kfree(data->mount_server.hostname);
- kfree(data->nfs_server.export_path);
- kfree(data->nfs_server.hostname);
- kfree(data->fscache_uniq);
- security_free_mnt_opts(&data->lsm_opts);
- kfree(data);
- }
-}
-
-/*
- * Sanity-check a server address provided by the mount command.
- *
- * Address family must be initialized, and address must not be
- * the ANY address for that family.
- */
-static int nfs_verify_server_address(struct sockaddr *addr)
-{
- switch (addr->sa_family) {
- case AF_INET: {
- struct sockaddr_in *sa = (struct sockaddr_in *)addr;
- return sa->sin_addr.s_addr != htonl(INADDR_ANY);
- }
- case AF_INET6: {
- struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
- return !ipv6_addr_any(sa);
- }
- }
-
- dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
- return 0;
-}
-
-/*
- * Select between a default port value and a user-specified port value.
- * If a zero value is set, then autobind will be used.
- */
-static void nfs_set_port(struct sockaddr *sap, int *port,
- const unsigned short default_port)
-{
- if (*port == NFS_UNSPEC_PORT)
- *port = default_port;
-
- rpc_set_port(sap, *port);
-}
-
-/*
- * Sanity check the NFS transport protocol.
- *
- */
-static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
- switch (mnt->nfs_server.protocol) {
- case XPRT_TRANSPORT_UDP:
- case XPRT_TRANSPORT_TCP:
- case XPRT_TRANSPORT_RDMA:
- break;
- default:
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- }
-}
-
-/*
- * For text based NFSv2/v3 mounts, the mount protocol transport default
- * settings should depend upon the specified NFS transport.
- */
-static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
-{
- nfs_validate_transport_protocol(mnt);
-
- if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP ||
- mnt->mount_server.protocol == XPRT_TRANSPORT_TCP)
- return;
- switch (mnt->nfs_server.protocol) {
- case XPRT_TRANSPORT_UDP:
- mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case XPRT_TRANSPORT_TCP:
- case XPRT_TRANSPORT_RDMA:
- mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
- }
-}
-
-/*
- * Add 'flavor' to 'auth_info' if not already present.
- * Returns true if 'flavor' ends up in the list, false otherwise
- */
-static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
- rpc_authflavor_t flavor)
-{
- unsigned int i;
- unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
-
- /* make sure this flavor isn't already in the list */
- for (i = 0; i < auth_info->flavor_len; i++) {
- if (flavor == auth_info->flavors[i])
- return true;
- }
-
- if (auth_info->flavor_len + 1 >= max_flavor_len) {
- dfprintk(MOUNT, "NFS: too many sec= flavors\n");
- return false;
- }
-
- auth_info->flavors[auth_info->flavor_len++] = flavor;
- return true;
-}
-
/*
* Return true if 'match' is in auth_info or auth_info is empty.
* Return false otherwise.
@@ -1087,633 +693,13 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info,
EXPORT_SYMBOL_GPL(nfs_auth_info_match);
/*
- * Parse the value of the 'sec=' option.
- */
-static int nfs_parse_security_flavors(char *value,
- struct nfs_parsed_mount_data *mnt)
-{
- substring_t args[MAX_OPT_ARGS];
- rpc_authflavor_t pseudoflavor;
- char *p;
-
- dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
-
- while ((p = strsep(&value, ":")) != NULL) {
- switch (match_token(p, nfs_secflavor_tokens, args)) {
- case Opt_sec_none:
- pseudoflavor = RPC_AUTH_NULL;
- break;
- case Opt_sec_sys:
- pseudoflavor = RPC_AUTH_UNIX;
- break;
- case Opt_sec_krb5:
- pseudoflavor = RPC_AUTH_GSS_KRB5;
- break;
- case Opt_sec_krb5i:
- pseudoflavor = RPC_AUTH_GSS_KRB5I;
- break;
- case Opt_sec_krb5p:
- pseudoflavor = RPC_AUTH_GSS_KRB5P;
- break;
- case Opt_sec_lkey:
- pseudoflavor = RPC_AUTH_GSS_LKEY;
- break;
- case Opt_sec_lkeyi:
- pseudoflavor = RPC_AUTH_GSS_LKEYI;
- break;
- case Opt_sec_lkeyp:
- pseudoflavor = RPC_AUTH_GSS_LKEYP;
- break;
- case Opt_sec_spkm:
- pseudoflavor = RPC_AUTH_GSS_SPKM;
- break;
- case Opt_sec_spkmi:
- pseudoflavor = RPC_AUTH_GSS_SPKMI;
- break;
- case Opt_sec_spkmp:
- pseudoflavor = RPC_AUTH_GSS_SPKMP;
- break;
- default:
- dfprintk(MOUNT,
- "NFS: sec= option '%s' not recognized\n", p);
- return 0;
- }
-
- if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor))
- return 0;
- }
-
- return 1;
-}
-
-static int nfs_parse_version_string(char *string,
- struct nfs_parsed_mount_data *mnt,
- substring_t *args)
-{
- mnt->flags &= ~NFS_MOUNT_VER3;
- switch (match_token(string, nfs_vers_tokens, args)) {
- case Opt_vers_2:
- mnt->version = 2;
- break;
- case Opt_vers_3:
- mnt->flags |= NFS_MOUNT_VER3;
- mnt->version = 3;
- break;
- case Opt_vers_4:
- /* Backward compatibility option. In future,
- * the mount program should always supply
- * a NFSv4 minor version number.
- */
- mnt->version = 4;
- break;
- case Opt_vers_4_0:
- mnt->version = 4;
- mnt->minorversion = 0;
- break;
- case Opt_vers_4_1:
- mnt->version = 4;
- mnt->minorversion = 1;
- break;
- case Opt_vers_4_2:
- mnt->version = 4;
- mnt->minorversion = 2;
- break;
- default:
- return 0;
- }
- return 1;
-}
-
-static int nfs_get_option_str(substring_t args[], char **option)
-{
- kfree(*option);
- *option = match_strdup(args);
- return !*option;
-}
-
-static int nfs_get_option_ul(substring_t args[], unsigned long *option)
-{
- int rc;
- char *string;
-
- string = match_strdup(args);
- if (string == NULL)
- return -ENOMEM;
- rc = kstrtoul(string, 10, option);
- kfree(string);
-
- return rc;
-}
-
-static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
- unsigned long l_bound, unsigned long u_bound)
-{
- int ret;
-
- ret = nfs_get_option_ul(args, option);
- if (ret != 0)
- return ret;
- if (*option < l_bound || *option > u_bound)
- return -ERANGE;
- return 0;
-}
-
-/*
- * Error-check and convert a string of mount options from user space into
- * a data structure. The whole mount string is processed; bad options are
- * skipped as they are encountered. If there were no errors, return 1;
- * otherwise return 0 (zero).
- */
-static int nfs_parse_mount_options(char *raw,
- struct nfs_parsed_mount_data *mnt)
-{
- char *p, *string;
- int rc, sloppy = 0, invalid_option = 0;
- unsigned short protofamily = AF_UNSPEC;
- unsigned short mountfamily = AF_UNSPEC;
-
- if (!raw) {
- dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
- return 1;
- }
- dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
-
- rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts);
- if (rc)
- goto out_security_failure;
-
- while ((p = strsep(&raw, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- unsigned long option;
- int token;
-
- if (!*p)
- continue;
-
- dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p);
-
- token = match_token(p, nfs_mount_option_tokens, args);
- switch (token) {
-
- /*
- * boolean options: foo/nofoo
- */
- case Opt_soft:
- mnt->flags |= NFS_MOUNT_SOFT;
- mnt->flags &= ~NFS_MOUNT_SOFTERR;
- break;
- case Opt_softerr:
- mnt->flags |= NFS_MOUNT_SOFTERR;
- mnt->flags &= ~NFS_MOUNT_SOFT;
- break;
- case Opt_hard:
- mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR);
- break;
- case Opt_posix:
- mnt->flags |= NFS_MOUNT_POSIX;
- break;
- case Opt_noposix:
- mnt->flags &= ~NFS_MOUNT_POSIX;
- break;
- case Opt_cto:
- mnt->flags &= ~NFS_MOUNT_NOCTO;
- break;
- case Opt_nocto:
- mnt->flags |= NFS_MOUNT_NOCTO;
- break;
- case Opt_ac:
- mnt->flags &= ~NFS_MOUNT_NOAC;
- break;
- case Opt_noac:
- mnt->flags |= NFS_MOUNT_NOAC;
- break;
- case Opt_lock:
- mnt->flags &= ~NFS_MOUNT_NONLM;
- mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_nolock:
- mnt->flags |= NFS_MOUNT_NONLM;
- mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_udp:
- mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_tcp:
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_rdma:
- mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
- mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(p);
- break;
- case Opt_acl:
- mnt->flags &= ~NFS_MOUNT_NOACL;
- break;
- case Opt_noacl:
- mnt->flags |= NFS_MOUNT_NOACL;
- break;
- case Opt_rdirplus:
- mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
- break;
- case Opt_nordirplus:
- mnt->flags |= NFS_MOUNT_NORDIRPLUS;
- break;
- case Opt_sharecache:
- mnt->flags &= ~NFS_MOUNT_UNSHARED;
- break;
- case Opt_nosharecache:
- mnt->flags |= NFS_MOUNT_UNSHARED;
- break;
- case Opt_resvport:
- mnt->flags &= ~NFS_MOUNT_NORESVPORT;
- break;
- case Opt_noresvport:
- mnt->flags |= NFS_MOUNT_NORESVPORT;
- break;
- case Opt_fscache:
- mnt->options |= NFS_OPTION_FSCACHE;
- kfree(mnt->fscache_uniq);
- mnt->fscache_uniq = NULL;
- break;
- case Opt_nofscache:
- mnt->options &= ~NFS_OPTION_FSCACHE;
- kfree(mnt->fscache_uniq);
- mnt->fscache_uniq = NULL;
- break;
- case Opt_migration:
- mnt->options |= NFS_OPTION_MIGRATION;
- break;
- case Opt_nomigration:
- mnt->options &= ~NFS_OPTION_MIGRATION;
- break;
-
- /*
- * options that take numeric values
- */
- case Opt_port:
- if (nfs_get_option_ul(args, &option) ||
- option > USHRT_MAX)
- goto out_invalid_value;
- mnt->nfs_server.port = option;
- break;
- case Opt_rsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->rsize = option;
- break;
- case Opt_wsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->wsize = option;
- break;
- case Opt_bsize:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->bsize = option;
- break;
- case Opt_timeo:
- if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
- goto out_invalid_value;
- mnt->timeo = option;
- break;
- case Opt_retrans:
- if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
- goto out_invalid_value;
- mnt->retrans = option;
- break;
- case Opt_acregmin:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmin = option;
- break;
- case Opt_acregmax:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmax = option;
- break;
- case Opt_acdirmin:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acdirmin = option;
- break;
- case Opt_acdirmax:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acdirmax = option;
- break;
- case Opt_actimeo:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->acregmin = mnt->acregmax =
- mnt->acdirmin = mnt->acdirmax = option;
- break;
- case Opt_namelen:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- mnt->namlen = option;
- break;
- case Opt_mountport:
- if (nfs_get_option_ul(args, &option) ||
- option > USHRT_MAX)
- goto out_invalid_value;
- mnt->mount_server.port = option;
- break;
- case Opt_mountvers:
- if (nfs_get_option_ul(args, &option) ||
- option < NFS_MNT_VERSION ||
- option > NFS_MNT3_VERSION)
- goto out_invalid_value;
- mnt->mount_server.version = option;
- break;
- case Opt_minorversion:
- if (nfs_get_option_ul(args, &option))
- goto out_invalid_value;
- if (option > NFS4_MAX_MINOR_VERSION)
- goto out_invalid_value;
- mnt->minorversion = option;
- break;
-
- /*
- * options that take text values
- */
- case Opt_nfsvers:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = nfs_parse_version_string(string, mnt, args);
- kfree(string);
- if (!rc)
- goto out_invalid_value;
- break;
- case Opt_sec:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- rc = nfs_parse_security_flavors(string, mnt);
- kfree(string);
- if (!rc) {
- dfprintk(MOUNT, "NFS: unrecognized "
- "security flavor\n");
- return 0;
- }
- break;
- case Opt_proto:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_xprt_protocol_tokens, args);
-
- protofamily = AF_INET;
- switch (token) {
- case Opt_xprt_udp6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_udp:
- mnt->flags &= ~NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_xprt_tcp6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_tcp:
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_xprt_rdma6:
- protofamily = AF_INET6;
- /* fall through */
- case Opt_xprt_rdma:
- /* vector side protocols to TCP */
- mnt->flags |= NFS_MOUNT_TCP;
- mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
- xprt_load_transport(string);
- break;
- default:
- dfprintk(MOUNT, "NFS: unrecognized "
- "transport protocol\n");
- kfree(string);
- return 0;
- }
- kfree(string);
- break;
- case Opt_mountproto:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_xprt_protocol_tokens, args);
- kfree(string);
-
- mountfamily = AF_INET;
- switch (token) {
- case Opt_xprt_udp6:
- mountfamily = AF_INET6;
- /* fall through */
- case Opt_xprt_udp:
- mnt->mount_server.protocol = XPRT_TRANSPORT_UDP;
- break;
- case Opt_xprt_tcp6:
- mountfamily = AF_INET6;
- /* fall through */
- case Opt_xprt_tcp:
- mnt->mount_server.protocol = XPRT_TRANSPORT_TCP;
- break;
- case Opt_xprt_rdma: /* not used for side protocols */
- default:
- dfprintk(MOUNT, "NFS: unrecognized "
- "transport protocol\n");
- return 0;
- }
- break;
- case Opt_addr:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- mnt->nfs_server.addrlen =
- rpc_pton(mnt->net, string, strlen(string),
- (struct sockaddr *)
- &mnt->nfs_server.address,
- sizeof(mnt->nfs_server.address));
- kfree(string);
- if (mnt->nfs_server.addrlen == 0)
- goto out_invalid_address;
- break;
- case Opt_clientaddr:
- if (nfs_get_option_str(args, &mnt->client_address))
- goto out_nomem;
- break;
- case Opt_mounthost:
- if (nfs_get_option_str(args,
- &mnt->mount_server.hostname))
- goto out_nomem;
- break;
- case Opt_mountaddr:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- mnt->mount_server.addrlen =
- rpc_pton(mnt->net, string, strlen(string),
- (struct sockaddr *)
- &mnt->mount_server.address,
- sizeof(mnt->mount_server.address));
- kfree(string);
- if (mnt->mount_server.addrlen == 0)
- goto out_invalid_address;
- break;
- case Opt_nconnect:
- if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS))
- goto out_invalid_value;
- mnt->nfs_server.nconnect = option;
- break;
- case Opt_lookupcache:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string,
- nfs_lookupcache_tokens, args);
- kfree(string);
- switch (token) {
- case Opt_lookupcache_all:
- mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
- break;
- case Opt_lookupcache_positive:
- mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
- mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
- break;
- case Opt_lookupcache_none:
- mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
- break;
- default:
- dfprintk(MOUNT, "NFS: invalid "
- "lookupcache argument\n");
- return 0;
- }
- break;
- case Opt_fscache_uniq:
- if (nfs_get_option_str(args, &mnt->fscache_uniq))
- goto out_nomem;
- mnt->options |= NFS_OPTION_FSCACHE;
- break;
- case Opt_local_lock:
- string = match_strdup(args);
- if (string == NULL)
- goto out_nomem;
- token = match_token(string, nfs_local_lock_tokens,
- args);
- kfree(string);
- switch (token) {
- case Opt_local_lock_all:
- mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- case Opt_local_lock_flock:
- mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
- break;
- case Opt_local_lock_posix:
- mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
- break;
- case Opt_local_lock_none:
- mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
- NFS_MOUNT_LOCAL_FCNTL);
- break;
- default:
- dfprintk(MOUNT, "NFS: invalid "
- "local_lock argument\n");
- return 0;
- }
- break;
-
- /*
- * Special options
- */
- case Opt_sloppy:
- sloppy = 1;
- dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
- break;
- case Opt_userspace:
- case Opt_deprecated:
- dfprintk(MOUNT, "NFS: ignoring mount option "
- "'%s'\n", p);
- break;
-
- default:
- invalid_option = 1;
- dfprintk(MOUNT, "NFS: unrecognized mount option "
- "'%s'\n", p);
- }
- }
-
- if (!sloppy && invalid_option)
- return 0;
-
- if (mnt->minorversion && mnt->version != 4)
- goto out_minorversion_mismatch;
-
- if (mnt->options & NFS_OPTION_MIGRATION &&
- (mnt->version != 4 || mnt->minorversion != 0))
- goto out_migration_misuse;
-
- /*
- * verify that any proto=/mountproto= options match the address
- * families in the addr=/mountaddr= options.
- */
- if (protofamily != AF_UNSPEC &&
- protofamily != mnt->nfs_server.address.ss_family)
- goto out_proto_mismatch;
-
- if (mountfamily != AF_UNSPEC) {
- if (mnt->mount_server.addrlen) {
- if (mountfamily != mnt->mount_server.address.ss_family)
- goto out_mountproto_mismatch;
- } else {
- if (mountfamily != mnt->nfs_server.address.ss_family)
- goto out_mountproto_mismatch;
- }
- }
-
- return 1;
-
-out_mountproto_mismatch:
- printk(KERN_INFO "NFS: mount server address does not match mountproto= "
- "option\n");
- return 0;
-out_proto_mismatch:
- printk(KERN_INFO "NFS: server address does not match proto= option\n");
- return 0;
-out_invalid_address:
- printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
- return 0;
-out_invalid_value:
- printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
- return 0;
-out_minorversion_mismatch:
- printk(KERN_INFO "NFS: mount option vers=%u does not support "
- "minorversion=%u\n", mnt->version, mnt->minorversion);
- return 0;
-out_migration_misuse:
- printk(KERN_INFO
- "NFS: 'migration' not supported for this NFS version\n");
- return 0;
-out_nomem:
- printk(KERN_INFO "NFS: not enough memory to parse option\n");
- return 0;
-out_security_failure:
- printk(KERN_INFO "NFS: security options invalid: %d\n", rc);
- return 0;
-}
-
-/*
- * Ensure that a specified authtype in args->auth_info is supported by
- * the server. Returns 0 and sets args->selected_flavor if it's ok, and
+ * Ensure that a specified authtype in ctx->auth_info is supported by
+ * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and
* -EACCES if not.
*/
-static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
- rpc_authflavor_t *server_authlist, unsigned int count)
+static int nfs_verify_authflavors(struct nfs_fs_context *ctx,
+ rpc_authflavor_t *server_authlist,
+ unsigned int count)
{
rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR;
bool found_auth_null = false;
@@ -1734,7 +720,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
for (i = 0; i < count; i++) {
flavor = server_authlist[i];
- if (nfs_auth_info_match(&args->auth_info, flavor))
+ if (nfs_auth_info_match(&ctx->auth_info, flavor))
goto out;
if (flavor == RPC_AUTH_NULL)
@@ -1742,7 +728,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
}
if (found_auth_null) {
- flavor = args->auth_info.flavors[0];
+ flavor = ctx->auth_info.flavors[0];
goto out;
}
@@ -1751,8 +737,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args,
return -EACCES;
out:
- args->selected_flavor = flavor;
- dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor);
+ ctx->selected_flavor = flavor;
+ dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor);
return 0;
}
@@ -1760,50 +746,51 @@ out:
* Use the remote server's MOUNT service to request the NFS file handle
* corresponding to the provided path.
*/
-static int nfs_request_mount(struct nfs_parsed_mount_data *args,
+static int nfs_request_mount(struct fs_context *fc,
struct nfs_fh *root_fh,
rpc_authflavor_t *server_authlist,
unsigned int *server_authlist_len)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_mount_request request = {
.sap = (struct sockaddr *)
- &args->mount_server.address,
- .dirpath = args->nfs_server.export_path,
- .protocol = args->mount_server.protocol,
+ &ctx->mount_server.address,
+ .dirpath = ctx->nfs_server.export_path,
+ .protocol = ctx->mount_server.protocol,
.fh = root_fh,
- .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
+ .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT,
.auth_flav_len = server_authlist_len,
.auth_flavs = server_authlist,
- .net = args->net,
+ .net = fc->net_ns,
};
int status;
- if (args->mount_server.version == 0) {
- switch (args->version) {
+ if (ctx->mount_server.version == 0) {
+ switch (ctx->version) {
default:
- args->mount_server.version = NFS_MNT3_VERSION;
+ ctx->mount_server.version = NFS_MNT3_VERSION;
break;
case 2:
- args->mount_server.version = NFS_MNT_VERSION;
+ ctx->mount_server.version = NFS_MNT_VERSION;
}
}
- request.version = args->mount_server.version;
+ request.version = ctx->mount_server.version;
- if (args->mount_server.hostname)
- request.hostname = args->mount_server.hostname;
+ if (ctx->mount_server.hostname)
+ request.hostname = ctx->mount_server.hostname;
else
- request.hostname = args->nfs_server.hostname;
+ request.hostname = ctx->nfs_server.hostname;
/*
* Construct the mount server's address.
*/
- if (args->mount_server.address.ss_family == AF_UNSPEC) {
- memcpy(request.sap, &args->nfs_server.address,
- args->nfs_server.addrlen);
- args->mount_server.addrlen = args->nfs_server.addrlen;
+ if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
+ memcpy(request.sap, &ctx->nfs_server.address,
+ ctx->nfs_server.addrlen);
+ ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
}
- request.salen = args->mount_server.addrlen;
- nfs_set_port(request.sap, &args->mount_server.port, 0);
+ request.salen = ctx->mount_server.addrlen;
+ nfs_set_port(request.sap, &ctx->mount_server.port, 0);
/*
* Now ask the mount server to map our export path
@@ -1819,20 +806,18 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args,
return 0;
}
-static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+static struct nfs_server *nfs_try_mount_request(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
int status;
unsigned int i;
bool tried_auth_unix = false;
bool auth_null_in_list = false;
struct nfs_server *server = ERR_PTR(-EACCES);
- struct nfs_parsed_mount_data *args = mount_info->parsed;
rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS];
unsigned int authlist_len = ARRAY_SIZE(authlist);
- status = nfs_request_mount(args, mount_info->mntfh, authlist,
- &authlist_len);
+ status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len);
if (status)
return ERR_PTR(status);
@@ -1840,13 +825,13 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
* Was a sec= authflavor specified in the options? First, verify
* whether the server supports it, and then just try to use it if so.
*/
- if (args->auth_info.flavor_len > 0) {
- status = nfs_verify_authflavors(args, authlist, authlist_len);
+ if (ctx->auth_info.flavor_len > 0) {
+ status = nfs_verify_authflavors(ctx, authlist, authlist_len);
dfprintk(MOUNT, "NFS: using auth flavor %u\n",
- args->selected_flavor);
+ ctx->selected_flavor);
if (status)
return ERR_PTR(status);
- return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ return ctx->nfs_mod->rpc_ops->create_server(fc);
}
/*
@@ -1872,8 +857,8 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Fallthrough */
}
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
- args->selected_flavor = flavor;
- server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ ctx->selected_flavor = flavor;
+ server = ctx->nfs_mod->rpc_ops->create_server(fc);
if (!IS_ERR(server))
return server;
}
@@ -1888,348 +873,23 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf
/* Last chance! Try AUTH_UNIX */
dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
- args->selected_flavor = RPC_AUTH_UNIX;
- return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
+ ctx->selected_flavor = RPC_AUTH_UNIX;
+ return ctx->nfs_mod->rpc_ops->create_server(fc);
}
-struct dentry *nfs_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+int nfs_try_get_tree(struct fs_context *fc)
{
- struct nfs_server *server;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
- if (mount_info->parsed->need_mount)
- server = nfs_try_mount_request(mount_info, nfs_mod);
+ if (ctx->need_mount)
+ ctx->server = nfs_try_mount_request(fc);
else
- server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
-
- if (IS_ERR(server))
- return ERR_CAST(server);
-
- return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod);
-}
-EXPORT_SYMBOL_GPL(nfs_try_mount);
-
-/*
- * Split "dev_name" into "hostname:export_path".
- *
- * The leftmost colon demarks the split between the server's hostname
- * and the export path. If the hostname starts with a left square
- * bracket, then it may contain colons.
- *
- * Note: caller frees hostname and export path, even on error.
- */
-static int nfs_parse_devname(const char *dev_name,
- char **hostname, size_t maxnamlen,
- char **export_path, size_t maxpathlen)
-{
- size_t len;
- char *end;
-
- if (unlikely(!dev_name || !*dev_name)) {
- dfprintk(MOUNT, "NFS: device name not specified\n");
- return -EINVAL;
- }
-
- /* Is the host name protected with square brakcets? */
- if (*dev_name == '[') {
- end = strchr(++dev_name, ']');
- if (end == NULL || end[1] != ':')
- goto out_bad_devname;
-
- len = end - dev_name;
- end++;
- } else {
- char *comma;
-
- end = strchr(dev_name, ':');
- if (end == NULL)
- goto out_bad_devname;
- len = end - dev_name;
-
- /* kill possible hostname list: not supported */
- comma = strchr(dev_name, ',');
- if (comma != NULL && comma < end)
- len = comma - dev_name;
- }
-
- if (len > maxnamlen)
- goto out_hostname;
-
- /* N.B. caller will free nfs_server.hostname in all cases */
- *hostname = kstrndup(dev_name, len, GFP_KERNEL);
- if (*hostname == NULL)
- goto out_nomem;
- len = strlen(++end);
- if (len > maxpathlen)
- goto out_path;
- *export_path = kstrndup(end, len, GFP_KERNEL);
- if (!*export_path)
- goto out_nomem;
-
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
- return 0;
-
-out_bad_devname:
- dfprintk(MOUNT, "NFS: device name not in host:path format\n");
- return -EINVAL;
+ ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc);
-out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
- return -ENOMEM;
-
-out_hostname:
- dfprintk(MOUNT, "NFS: server hostname too long\n");
- return -ENAMETOOLONG;
-
-out_path:
- dfprintk(MOUNT, "NFS: export pathname too long\n");
- return -ENAMETOOLONG;
+ return nfs_get_tree_common(fc);
}
+EXPORT_SYMBOL_GPL(nfs_try_get_tree);
-/*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
- *
- * For option strings, user space handles the following behaviors:
- *
- * + DNS: mapping server host name to IP address ("addr=" option)
- *
- * + failure mode: how to behave if a mount request can't be handled
- * immediately ("fg/bg" option)
- *
- * + retry: how often to retry a mount request ("retry=" option)
- *
- * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
- * mountproto=tcp after mountproto=udp, and so on
- */
-static int nfs23_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- struct nfs_mount_data *data = (struct nfs_mount_data *)options;
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
- int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
-
- if (data == NULL)
- goto out_no_data;
-
- args->version = NFS_DEFAULT_VERSION;
- switch (data->version) {
- case 1:
- data->namlen = 0; /* fall through */
- case 2:
- data->bsize = 0; /* fall through */
- case 3:
- if (data->flags & NFS_MOUNT_VER3)
- goto out_no_v3;
- data->root.size = NFS2_FHSIZE;
- memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
- /* Turn off security negotiation */
- extra_flags |= NFS_MOUNT_SECFLAVOUR;
- /* fall through */
- case 4:
- if (data->flags & NFS_MOUNT_SECFLAVOUR)
- goto out_no_sec;
- /* fall through */
- case 5:
- memset(data->context, 0, sizeof(data->context));
- /* fall through */
- case 6:
- if (data->flags & NFS_MOUNT_VER3) {
- if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
- goto out_invalid_fh;
- mntfh->size = data->root.size;
- args->version = 3;
- } else {
- mntfh->size = NFS2_FHSIZE;
- args->version = 2;
- }
-
-
- memcpy(mntfh->data, data->root.data, mntfh->size);
- if (mntfh->size < sizeof(mntfh->data))
- memset(mntfh->data + mntfh->size, 0,
- sizeof(mntfh->data) - mntfh->size);
-
- /*
- * Translate to nfs_parsed_mount_data, which nfs_fill_super
- * can deal with.
- */
- args->flags = data->flags & NFS_MOUNT_FLAGMASK;
- args->flags |= extra_flags;
- args->rsize = data->rsize;
- args->wsize = data->wsize;
- args->timeo = data->timeo;
- args->retrans = data->retrans;
- args->acregmin = data->acregmin;
- args->acregmax = data->acregmax;
- args->acdirmin = data->acdirmin;
- args->acdirmax = data->acdirmax;
- args->need_mount = false;
-
- memcpy(sap, &data->addr, sizeof(data->addr));
- args->nfs_server.addrlen = sizeof(data->addr);
- args->nfs_server.port = ntohs(data->addr.sin_port);
- if (sap->sa_family != AF_INET ||
- !nfs_verify_server_address(sap))
- goto out_no_address;
-
- if (!(data->flags & NFS_MOUNT_TCP))
- args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
- /* N.B. caller will free nfs_server.hostname in all cases */
- args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
- args->namlen = data->namlen;
- args->bsize = data->bsize;
-
- if (data->flags & NFS_MOUNT_SECFLAVOUR)
- args->selected_flavor = data->pseudoflavor;
- else
- args->selected_flavor = RPC_AUTH_UNIX;
- if (!args->nfs_server.hostname)
- goto out_nomem;
-
- if (!(data->flags & NFS_MOUNT_NONLM))
- args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
- NFS_MOUNT_LOCAL_FCNTL);
- else
- args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
- NFS_MOUNT_LOCAL_FCNTL);
- /*
- * The legacy version 6 binary mount data from userspace has a
- * field used only to transport selinux information into the
- * the kernel. To continue to support that functionality we
- * have a touch of selinux knowledge here in the NFS code. The
- * userspace code converted context=blah to just blah so we are
- * converting back to the full string selinux understands.
- */
- if (data->context[0]){
-#ifdef CONFIG_SECURITY_SELINUX
- int rc;
- data->context[NFS_MAX_CONTEXT_LEN] = '\0';
- rc = security_add_mnt_opt("context", data->context,
- strlen(data->context), &args->lsm_opts);
- if (rc)
- return rc;
-#else
- return -EINVAL;
-#endif
- }
-
- break;
- default:
- return NFS_TEXT_DATA;
- }
-
- return 0;
-
-out_no_data:
- dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
- return -EINVAL;
-
-out_no_v3:
- dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
- data->version);
- return -EINVAL;
-
-out_no_sec:
- dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
- return -EINVAL;
-
-out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
- return -ENOMEM;
-
-out_no_address:
- dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
- return -EINVAL;
-
-out_invalid_fh:
- dfprintk(MOUNT, "NFS: invalid root filehandle\n");
- return -EINVAL;
-}
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
- void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- if (fs_type == &nfs_fs_type)
- return nfs23_validate_mount_data(options, args, mntfh, dev_name);
- return nfs4_validate_mount_data(options, args, dev_name);
-}
-#else
-static int nfs_validate_mount_data(struct file_system_type *fs_type,
- void *options,
- struct nfs_parsed_mount_data *args,
- struct nfs_fh *mntfh,
- const char *dev_name)
-{
- return nfs23_validate_mount_data(options, args, mntfh, dev_name);
-}
-#endif
-
-static int nfs_validate_text_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- const char *dev_name)
-{
- int port = 0;
- int max_namelen = PAGE_SIZE;
- int max_pathlen = NFS_MAXPATHLEN;
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
-
- if (nfs_parse_mount_options((char *)options, args) == 0)
- return -EINVAL;
-
- if (!nfs_verify_server_address(sap))
- goto out_no_address;
-
- if (args->version == 4) {
-#if IS_ENABLED(CONFIG_NFS_V4)
- if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
- port = NFS_RDMA_PORT;
- else
- port = NFS_PORT;
- max_namelen = NFS4_MAXNAMLEN;
- max_pathlen = NFS4_MAXPATHLEN;
- nfs_validate_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
- nfs4_validate_mount_flags(args);
-#else
- goto out_v4_not_compiled;
-#endif /* CONFIG_NFS_V4 */
- } else {
- nfs_set_mount_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
- port = NFS_RDMA_PORT;
- }
-
- nfs_set_port(sap, &args->nfs_server.port, port);
-
- return nfs_parse_devname(dev_name,
- &args->nfs_server.hostname,
- max_namelen,
- &args->nfs_server.export_path,
- max_pathlen);
-
-#if !IS_ENABLED(CONFIG_NFS_V4)
-out_v4_not_compiled:
- dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
- return -EPROTONOSUPPORT;
-#else
-out_invalid_transport_udp:
- dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
- return -EINVAL;
-#endif /* !CONFIG_NFS_V4 */
-
-out_no_address:
- dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
- return -EINVAL;
-}
#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
| NFS_MOUNT_SECURE \
@@ -2246,39 +906,35 @@ out_no_address:
static int
nfs_compare_remount_data(struct nfs_server *nfss,
- struct nfs_parsed_mount_data *data)
-{
- if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
- data->rsize != nfss->rsize ||
- data->wsize != nfss->wsize ||
- data->version != nfss->nfs_client->rpc_ops->version ||
- data->minorversion != nfss->nfs_client->cl_minorversion ||
- data->retrans != nfss->client->cl_timeout->to_retries ||
- !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) ||
- data->acregmin != nfss->acregmin / HZ ||
- data->acregmax != nfss->acregmax / HZ ||
- data->acdirmin != nfss->acdirmin / HZ ||
- data->acdirmax != nfss->acdirmax / HZ ||
- data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
- (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
- data->nfs_server.port != nfss->port ||
- data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
- !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
+ struct nfs_fs_context *ctx)
+{
+ if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
+ ctx->rsize != nfss->rsize ||
+ ctx->wsize != nfss->wsize ||
+ ctx->version != nfss->nfs_client->rpc_ops->version ||
+ ctx->minorversion != nfss->nfs_client->cl_minorversion ||
+ ctx->retrans != nfss->client->cl_timeout->to_retries ||
+ !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) ||
+ ctx->acregmin != nfss->acregmin / HZ ||
+ ctx->acregmax != nfss->acregmax / HZ ||
+ ctx->acdirmin != nfss->acdirmin / HZ ||
+ ctx->acdirmax != nfss->acdirmax / HZ ||
+ ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
+ (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
+ ctx->nfs_server.port != nfss->port ||
+ ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
+ !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address,
(struct sockaddr *)&nfss->nfs_client->cl_addr))
return -EINVAL;
return 0;
}
-int
-nfs_remount(struct super_block *sb, int *flags, char *raw_data)
+int nfs_reconfigure(struct fs_context *fc)
{
- int error;
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
+ struct super_block *sb = fc->root->d_sb;
struct nfs_server *nfss = sb->s_fs_info;
- struct nfs_parsed_mount_data *data;
- struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data;
- struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data;
- u32 nfsvers = nfss->nfs_client->rpc_ops->version;
sync_filesystem(sb);
@@ -2288,92 +944,38 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
* ones were explicitly specified. Fall back to legacy behavior and
* just return success.
*/
- if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
- (nfsvers <= 3 && (!options || (options->version >= 1 &&
- options->version <= 6))))
+ if (ctx->skip_reconfig_option_check)
return 0;
- data = nfs_alloc_parsed_mount_data();
- if (data == NULL)
- return -ENOMEM;
-
- /* fill out struct with values from existing mount */
- data->flags = nfss->flags;
- data->rsize = nfss->rsize;
- data->wsize = nfss->wsize;
- data->retrans = nfss->client->cl_timeout->to_retries;
- data->selected_flavor = nfss->client->cl_auth->au_flavor;
- data->acregmin = nfss->acregmin / HZ;
- data->acregmax = nfss->acregmax / HZ;
- data->acdirmin = nfss->acdirmin / HZ;
- data->acdirmax = nfss->acdirmax / HZ;
- data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ;
- data->nfs_server.port = nfss->port;
- data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
- data->version = nfsvers;
- data->minorversion = nfss->nfs_client->cl_minorversion;
- data->net = current->nsproxy->net_ns;
- memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
- data->nfs_server.addrlen);
-
- /* overwrite those values with any that were specified */
- error = -EINVAL;
- if (!nfs_parse_mount_options((char *)options, data))
- goto out;
-
/*
* noac is a special case. It implies -o sync, but that's not
- * necessarily reflected in the mtab options. do_remount_sb
+ * necessarily reflected in the mtab options. reconfigure_super
* will clear SB_SYNCHRONOUS if -o sync wasn't specified in the
* remount options, so we have to explicitly reset it.
*/
- if (data->flags & NFS_MOUNT_NOAC)
- *flags |= SB_SYNCHRONOUS;
+ if (ctx->flags & NFS_MOUNT_NOAC) {
+ fc->sb_flags |= SB_SYNCHRONOUS;
+ fc->sb_flags_mask |= SB_SYNCHRONOUS;
+ }
/* compare new mount options with old ones */
- error = nfs_compare_remount_data(nfss, data);
- if (!error)
- error = security_sb_remount(sb, data->lsm_opts);
-out:
- nfs_free_parsed_mount_data(data);
- return error;
-}
-EXPORT_SYMBOL_GPL(nfs_remount);
-
-/*
- * Initialise the common bits of the superblock
- */
-static void nfs_initialise_sb(struct super_block *sb)
-{
- struct nfs_server *server = NFS_SB(sb);
-
- sb->s_magic = NFS_SUPER_MAGIC;
-
- /* We probably want something more informative here */
- snprintf(sb->s_id, sizeof(sb->s_id),
- "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
- if (sb->s_blocksize == 0)
- sb->s_blocksize = nfs_block_bits(server->wsize,
- &sb->s_blocksize_bits);
-
- nfs_super_set_maxbytes(sb, server->maxfilesize);
+ return nfs_compare_remount_data(nfss, ctx);
}
+EXPORT_SYMBOL_GPL(nfs_reconfigure);
/*
- * Finish setting up an NFS2/3 superblock
+ * Finish setting up an NFS superblock
*/
-void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
+static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
{
- struct nfs_parsed_mount_data *data = mount_info->parsed;
struct nfs_server *server = NFS_SB(sb);
sb->s_blocksize_bits = 0;
sb->s_blocksize = 0;
sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr;
sb->s_op = server->nfs_client->cl_nfs_mod->sops;
- if (data && data->bsize)
- sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
+ if (ctx && ctx->bsize)
+ sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do
@@ -2393,53 +995,27 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
sb->s_time_max = S64_MAX;
}
- nfs_initialise_sb(sb);
-}
-EXPORT_SYMBOL_GPL(nfs_fill_super);
-
-/*
- * Finish setting up a cloned NFS2/3/4 superblock
- */
-static void nfs_clone_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
-{
- const struct super_block *old_sb = mount_info->cloned->sb;
- struct nfs_server *server = NFS_SB(sb);
-
- sb->s_blocksize_bits = old_sb->s_blocksize_bits;
- sb->s_blocksize = old_sb->s_blocksize;
- sb->s_maxbytes = old_sb->s_maxbytes;
- sb->s_xattr = old_sb->s_xattr;
- sb->s_op = old_sb->s_op;
- sb->s_export_op = old_sb->s_export_op;
+ sb->s_magic = NFS_SUPER_MAGIC;
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
- */
- sb->s_flags |= SB_POSIXACL;
- sb->s_time_gran = 1;
- } else
- sb->s_time_gran = 1000;
+ /* We probably want something more informative here */
+ snprintf(sb->s_id, sizeof(sb->s_id),
+ "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev));
- if (server->nfs_client->rpc_ops->version != 4) {
- sb->s_time_min = 0;
- sb->s_time_max = U32_MAX;
- } else {
- sb->s_time_min = S64_MIN;
- sb->s_time_max = S64_MAX;
- }
+ if (sb->s_blocksize == 0)
+ sb->s_blocksize = nfs_block_bits(server->wsize,
+ &sb->s_blocksize_bits);
- nfs_initialise_sb(sb);
+ nfs_super_set_maxbytes(sb, server->maxfilesize);
}
-static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b,
+ const struct fs_context *fc)
{
const struct nfs_server *a = s->s_fs_info;
const struct rpc_clnt *clnt_a = a->client;
const struct rpc_clnt *clnt_b = b->client;
- if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+ if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK))
goto Ebusy;
if (a->nfs_client != b->nfs_client)
goto Ebusy;
@@ -2464,19 +1040,11 @@ Ebusy:
return 0;
}
-struct nfs_sb_mountdata {
- struct nfs_server *server;
- int mntflags;
-};
-
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, struct fs_context *fc)
{
- struct nfs_sb_mountdata *sb_mntdata = data;
- struct nfs_server *server = sb_mntdata->server;
+ struct nfs_server *server = fc->s_fs_info;
int ret;
- s->s_flags = sb_mntdata->mntflags;
- s->s_fs_info = server;
s->s_d_op = server->nfs_client->rpc_ops->dentry_ops;
ret = set_anon_super(s, server);
if (ret == 0)
@@ -2541,11 +1109,9 @@ static int nfs_compare_userns(const struct nfs_server *old,
return 1;
}
-static int nfs_compare_super(struct super_block *sb, void *data)
+static int nfs_compare_super(struct super_block *sb, struct fs_context *fc)
{
- struct nfs_sb_mountdata *sb_mntdata = data;
- struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
- int mntflags = sb_mntdata->mntflags;
+ struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb);
if (!nfs_compare_super_address(old, server))
return 0;
@@ -2556,13 +1122,12 @@ static int nfs_compare_super(struct super_block *sb, void *data)
return 0;
if (!nfs_compare_userns(old, server))
return 0;
- return nfs_compare_mount_options(sb, server, mntflags);
+ return nfs_compare_mount_options(sb, server, fc);
}
#ifdef CONFIG_NFS_FSCACHE
static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *parsed,
- struct nfs_clone_mount *cloned)
+ struct nfs_fs_context *ctx)
{
struct nfs_server *nfss = NFS_SB(sb);
char *uniq = NULL;
@@ -2571,80 +1136,36 @@ static void nfs_get_cache_cookie(struct super_block *sb,
nfss->fscache_key = NULL;
nfss->fscache = NULL;
- if (parsed) {
- if (!(parsed->options & NFS_OPTION_FSCACHE))
- return;
- if (parsed->fscache_uniq) {
- uniq = parsed->fscache_uniq;
- ulen = strlen(parsed->fscache_uniq);
- }
- } else if (cloned) {
- struct nfs_server *mnt_s = NFS_SB(cloned->sb);
+ if (!ctx)
+ return;
+
+ if (ctx->clone_data.sb) {
+ struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb);
if (!(mnt_s->options & NFS_OPTION_FSCACHE))
return;
if (mnt_s->fscache_key) {
uniq = mnt_s->fscache_key->key.uniquifier;
ulen = mnt_s->fscache_key->key.uniq_len;
}
- } else
+ } else {
+ if (!(ctx->options & NFS_OPTION_FSCACHE))
+ return;
+ if (ctx->fscache_uniq) {
+ uniq = ctx->fscache_uniq;
+ ulen = strlen(ctx->fscache_uniq);
+ }
return;
+ }
nfs_fscache_get_super_cookie(sb, uniq, ulen);
}
#else
static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_parsed_mount_data *parsed,
- struct nfs_clone_mount *cloned)
+ struct nfs_fs_context *ctx)
{
}
#endif
-int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
-{
- int error;
- unsigned long kflags = 0, kflags_out = 0;
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
- kflags |= SECURITY_LSM_NATIVE_LABELS;
-
- error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts,
- kflags, &kflags_out);
- if (error)
- goto err;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
- !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
- NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
-err:
- return error;
-}
-EXPORT_SYMBOL_GPL(nfs_set_sb_security);
-
-int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
-{
- int error;
- unsigned long kflags = 0, kflags_out = 0;
-
- /* clone any lsm security options from the parent to the new sb */
- if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
- return -ESTALE;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
- kflags |= SECURITY_LSM_NATIVE_LABELS;
-
- error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags,
- &kflags_out);
- if (error)
- return error;
-
- if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
- !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
- NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
-
static void nfs_set_readahead(struct backing_dev_info *bdi,
unsigned long iomax_pages)
{
@@ -2652,35 +1173,40 @@ static void nfs_set_readahead(struct backing_dev_info *bdi,
bdi->io_pages = iomax_pages;
}
-struct dentry *nfs_fs_mount_common(struct nfs_server *server,
- int flags, const char *dev_name,
- struct nfs_mount_info *mount_info,
- struct nfs_subversion *nfs_mod)
+int nfs_get_tree_common(struct fs_context *fc)
{
+ struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct super_block *s;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
- struct nfs_sb_mountdata sb_mntdata = {
- .mntflags = flags,
- .server = server,
- };
+ int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super;
+ struct nfs_server *server = ctx->server;
+ unsigned long kflags = 0, kflags_out = 0;
int error;
+ ctx->server = NULL;
+ if (IS_ERR(server))
+ return PTR_ERR(server);
+
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
- sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+ fc->sb_flags |= SB_SYNCHRONOUS;
- if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
- if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS)
- sb_mntdata.mntflags |= SB_SYNCHRONOUS;
+ if (ctx->clone_data.sb)
+ if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS)
+ fc->sb_flags |= SB_SYNCHRONOUS;
+
+ if (server->caps & NFS_CAP_SECURITY_LABEL)
+ fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
+ fc->s_fs_info = server;
+ s = sget_fc(fc, compare_super, nfs_set_super);
+ fc->s_fs_info = NULL;
if (IS_ERR(s)) {
- mntroot = ERR_CAST(s);
+ error = PTR_ERR(s);
+ nfs_errorf(fc, "NFS: Couldn't get superblock");
goto out_err_nosb;
}
@@ -2690,88 +1216,66 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
} else {
error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev),
MINOR(server->s_dev));
- if (error) {
- mntroot = ERR_PTR(error);
+ if (error)
goto error_splat_super;
- }
nfs_set_readahead(s->s_bdi, server->rpages);
server->super = s;
}
if (!s->s_root) {
+ unsigned bsize = ctx->clone_data.inherited_bsize;
/* initial superblock/root creation */
- mount_info->fill_super(s, mount_info);
- nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned);
- if (!(server->flags & NFS_MOUNT_UNSHARED))
- s->s_iflags |= SB_I_MULTIROOT;
+ nfs_fill_super(s, ctx);
+ if (bsize) {
+ s->s_blocksize_bits = bsize;
+ s->s_blocksize = 1U << bsize;
+ }
+ nfs_get_cache_cookie(s, ctx);
}
- mntroot = nfs_get_root(s, mount_info->mntfh, dev_name);
- if (IS_ERR(mntroot))
+ error = nfs_get_root(s, fc);
+ if (error < 0) {
+ nfs_errorf(fc, "NFS: Couldn't get root dentry");
goto error_splat_super;
+ }
- error = mount_info->set_security(s, mntroot, mount_info);
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL)
+ kflags |= SECURITY_LSM_NATIVE_LABELS;
+ if (ctx->clone_data.sb) {
+ if (d_inode(fc->root)->i_fop != &nfs_dir_operations) {
+ error = -ESTALE;
+ goto error_splat_root;
+ }
+ /* clone any lsm security options from the parent to the new sb */
+ error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags,
+ &kflags_out);
+ } else {
+ error = security_sb_set_mnt_opts(s, fc->security,
+ kflags, &kflags_out);
+ }
if (error)
goto error_splat_root;
+ if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL &&
+ !(kflags_out & SECURITY_LSM_NATIVE_LABELS))
+ NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL;
s->s_flags |= SB_ACTIVE;
+ error = 0;
out:
- return mntroot;
+ return error;
out_err_nosb:
nfs_free_server(server);
goto out;
error_splat_root:
- dput(mntroot);
- mntroot = ERR_PTR(error);
+ dput(fc->root);
+ fc->root = NULL;
error_splat_super:
deactivate_locked_super(s);
goto out;
}
-EXPORT_SYMBOL_GPL(nfs_fs_mount_common);
-
-struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_fill_super,
- .set_security = nfs_set_sb_security,
- };
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- struct nfs_subversion *nfs_mod;
- int error;
-
- mount_info.parsed = nfs_alloc_parsed_mount_data();
- mount_info.mntfh = nfs_alloc_fhandle();
- if (mount_info.parsed == NULL || mount_info.mntfh == NULL)
- goto out;
-
- /* Validate the mount data */
- error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name);
- if (error == NFS_TEXT_DATA)
- error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name);
- if (error < 0) {
- mntroot = ERR_PTR(error);
- goto out;
- }
-
- nfs_mod = get_nfs_version(mount_info.parsed->version);
- if (IS_ERR(nfs_mod)) {
- mntroot = ERR_CAST(nfs_mod);
- goto out;
- }
-
- mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod);
-
- put_nfs_version(nfs_mod);
-out:
- nfs_free_parsed_mount_data(mount_info.parsed);
- nfs_free_fhandle(mount_info.mntfh);
- return mntroot;
-}
-EXPORT_SYMBOL_GPL(nfs_fs_mount);
/*
* Destroy an NFS2/3 superblock
@@ -2790,150 +1294,8 @@ void nfs_kill_super(struct super_block *s)
}
EXPORT_SYMBOL_GPL(nfs_kill_super);
-/*
- * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
- */
-static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_clone_mount *data = raw_data;
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_clone_super,
- .set_security = nfs_clone_sb_security,
- .cloned = data,
- };
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
- struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
-
- dprintk("--> nfs_xdev_mount()\n");
-
- mount_info.mntfh = mount_info.cloned->fh;
-
- /* create a new volume representation */
- server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
-
- if (IS_ERR(server))
- mntroot = ERR_CAST(server);
- else
- mntroot = nfs_fs_mount_common(server, flags,
- dev_name, &mount_info, nfs_mod);
-
- dprintk("<-- nfs_xdev_mount() = %ld\n",
- IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
- return mntroot;
-}
-
#if IS_ENABLED(CONFIG_NFS_V4)
-static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
-{
- args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
- NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
-}
-
-/*
- * Validate NFSv4 mount options
- */
-static int nfs4_validate_mount_data(void *options,
- struct nfs_parsed_mount_data *args,
- const char *dev_name)
-{
- struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
- struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
- char *c;
-
- if (data == NULL)
- goto out_no_data;
-
- args->version = 4;
-
- switch (data->version) {
- case 1:
- if (data->host_addrlen > sizeof(args->nfs_server.address))
- goto out_no_address;
- if (data->host_addrlen == 0)
- goto out_no_address;
- args->nfs_server.addrlen = data->host_addrlen;
- if (copy_from_user(sap, data->host_addr, data->host_addrlen))
- return -EFAULT;
- if (!nfs_verify_server_address(sap))
- goto out_no_address;
- args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
-
- if (data->auth_flavourlen) {
- rpc_authflavor_t pseudoflavor;
- if (data->auth_flavourlen > 1)
- goto out_inval_auth;
- if (copy_from_user(&pseudoflavor,
- data->auth_flavours,
- sizeof(pseudoflavor)))
- return -EFAULT;
- args->selected_flavor = pseudoflavor;
- } else
- args->selected_flavor = RPC_AUTH_UNIX;
-
- c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->nfs_server.hostname = c;
-
- c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->nfs_server.export_path = c;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
-
- c = strndup_user(data->client_addr.data, 16);
- if (IS_ERR(c))
- return PTR_ERR(c);
- args->client_address = c;
-
- /*
- * Translate to nfs_parsed_mount_data, which nfs4_fill_super
- * can deal with.
- */
-
- args->flags = data->flags & NFS4_MOUNT_FLAGMASK;
- args->rsize = data->rsize;
- args->wsize = data->wsize;
- args->timeo = data->timeo;
- args->retrans = data->retrans;
- args->acregmin = data->acregmin;
- args->acregmax = data->acregmax;
- args->acdirmin = data->acdirmin;
- args->acdirmax = data->acdirmax;
- args->nfs_server.protocol = data->proto;
- nfs_validate_transport_protocol(args);
- if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
-
- break;
- default:
- return NFS_TEXT_DATA;
- }
-
- return 0;
-
-out_no_data:
- dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
- return -EINVAL;
-
-out_inval_auth:
- dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
- data->auth_flavourlen);
- return -EINVAL;
-
-out_no_address:
- dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
- return -EINVAL;
-
-out_invalid_transport_udp:
- dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
- return -EINVAL;
-}
-
/*
* NFS v4 module parameters need to stay in the
* NFS client for backwards compatibility
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 52cab65f91cf..c478b772cc49 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -243,13 +243,24 @@ out:
/* A writeback failed: mark the page as bad, and invalidate the page cache */
static void nfs_set_pageerror(struct address_space *mapping)
{
+ struct inode *inode = mapping->host;
+
nfs_zap_mapping(mapping->host, mapping);
+ /* Force file size revalidation */
+ spin_lock(&inode->i_lock);
+ NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED |
+ NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_INVALID_SIZE;
+ spin_unlock(&inode->i_lock);
}
static void nfs_mapping_set_error(struct page *page, int error)
{
+ struct address_space *mapping = page_file_mapping(page);
+
SetPageError(page);
- mapping_set_error(page_file_mapping(page), error);
+ mapping_set_error(mapping, error);
+ nfs_set_pageerror(mapping);
}
/*
@@ -592,7 +603,7 @@ release_request:
static void nfs_write_error(struct nfs_page *req, int error)
{
- nfs_set_pageerror(page_file_mapping(req->wb_page));
+ trace_nfs_write_error(req, error);
nfs_mapping_set_error(req->wb_page, error);
nfs_inode_remove_request(req);
nfs_end_page_writeback(req);
@@ -998,7 +1009,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_list_remove_request(req);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
- nfs_set_pageerror(page_file_mapping(req->wb_page));
+ trace_nfs_comp_error(req, hdr->error);
nfs_mapping_set_error(req->wb_page, hdr->error);
goto remove_req;
}
@@ -1403,8 +1414,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
task_setup_data->priority = priority;
rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
- trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
- hdr->args.stable);
+ trace_nfs_initiate_write(hdr);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1568,8 +1578,7 @@ static int nfs_writeback_done(struct rpc_task *task,
return status;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
- trace_nfs_writeback_done(inode, task->tk_status,
- hdr->args.offset, hdr->res.verf);
+ trace_nfs_writeback_done(task, hdr);
if (hdr->res.verf->committed < hdr->args.stable &&
task->tk_status >= 0) {
@@ -1649,6 +1658,8 @@ static void nfs_writeback_result(struct rpc_task *task,
*/
argp->stable = NFS_FILE_SYNC;
}
+ resp->count = 0;
+ resp->verf->committed = 0;
rpc_restart_call_prepare(task);
}
}
@@ -1824,11 +1835,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
/* Call the NFS version-specific code */
NFS_PROTO(data->inode)->commit_done(task, data);
- trace_nfs_commit_done(data);
+ trace_nfs_commit_done(task, data);
}
static void nfs_commit_release_pages(struct nfs_commit_data *data)
{
+ const struct nfs_writeverf *verf = data->res.verf;
struct nfs_page *req;
int status = data->task.tk_status;
struct nfs_commit_info cinfo;
@@ -1847,6 +1859,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
(long long)req_offset(req));
if (status < 0) {
if (req->wb_page) {
+ trace_nfs_commit_error(req, status);
nfs_mapping_set_error(req->wb_page, status);
nfs_inode_remove_request(req);
}
@@ -1856,7 +1869,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Okay, COMMIT succeeded, apparently. Check the verifier
* returned by the server against all stored verfs. */
- if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
+ if (verf->committed > NFS_UNSTABLE &&
+ !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) {
/* We have a match */
if (req->wb_page)
nfs_inode_remove_request(req);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f2f81561ebb6..f368f3215f88 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -134,6 +134,16 @@ config NFSD_FLEXFILELAYOUT
If unsure, say N.
+config NFSD_V4_2_INTER_SSC
+ bool "NFSv4.2 inter server to server COPY"
+ depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+ help
+ This option enables support for NFSv4.2 inter server to
+ server copy where the destination server calls the NFSv4.2
+ client to read the data to copy from the source server.
+
+ If unsure, say N.
+
config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 32a9bf22ac08..22e77ede9f14 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -27,7 +27,6 @@
#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS)
#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
-#define NFSD_FILE_LRU_RESCAN (0)
#define NFSD_FILE_SHUTDOWN (1)
#define NFSD_FILE_LRU_THRESHOLD (4096UL)
#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2)
@@ -44,6 +43,17 @@ struct nfsd_fcache_bucket {
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+struct nfsd_fcache_disposal {
+ struct list_head list;
+ struct work_struct work;
+ struct net *net;
+ spinlock_t lock;
+ struct list_head freeme;
+ struct rcu_head rcu;
+};
+
+static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
+
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct nfsd_fcache_bucket *nfsd_file_hashtbl;
@@ -52,32 +62,21 @@ static long nfsd_file_lru_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static atomic_long_t nfsd_filecache_count;
static struct delayed_work nfsd_filecache_laundrette;
+static DEFINE_SPINLOCK(laundrette_lock);
+static LIST_HEAD(laundrettes);
-enum nfsd_file_laundrette_ctl {
- NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
- NFSD_FILE_LAUNDRETTE_MAY_FLUSH
-};
+static void nfsd_file_gc(void);
static void
-nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
+nfsd_file_schedule_laundrette(void)
{
long count = atomic_long_read(&nfsd_filecache_count);
if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
return;
- /* Be more aggressive about scanning if over the threshold */
- if (count > NFSD_FILE_LRU_THRESHOLD)
- mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
- else
- schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
-
- if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
- return;
-
- /* ...and don't delay flushing if we're out of control */
- if (count >= NFSD_FILE_LRU_LIMIT)
- flush_delayed_work(&nfsd_filecache_laundrette);
+ queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+ NFSD_LAUNDRETTE_DELAY);
}
static void
@@ -101,7 +100,7 @@ nfsd_file_mark_free(struct fsnotify_mark *mark)
static struct nfsd_file_mark *
nfsd_file_mark_get(struct nfsd_file_mark *nfm)
{
- if (!atomic_inc_not_zero(&nfm->nfm_ref))
+ if (!refcount_inc_not_zero(&nfm->nfm_ref))
return NULL;
return nfm;
}
@@ -109,8 +108,7 @@ nfsd_file_mark_get(struct nfsd_file_mark *nfm)
static void
nfsd_file_mark_put(struct nfsd_file_mark *nfm)
{
- if (atomic_dec_and_test(&nfm->nfm_ref)) {
-
+ if (refcount_dec_and_test(&nfm->nfm_ref)) {
fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
fsnotify_put_mark(&nfm->nfm_mark);
}
@@ -133,9 +131,13 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
struct nfsd_file_mark,
nfm_mark));
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
- fsnotify_put_mark(mark);
- if (likely(nfm))
+ if (nfm) {
+ fsnotify_put_mark(mark);
break;
+ }
+ /* Avoid soft lockup race with nfsd_file_mark_put() */
+ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(mark);
} else
mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
@@ -145,7 +147,7 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf)
return NULL;
fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
- atomic_set(&new->nfm_ref, 1);
+ refcount_set(&new->nfm_ref, 1);
err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
@@ -183,7 +185,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
nf->nf_flags = 0;
nf->nf_inode = inode;
nf->nf_hashval = hashval;
- atomic_set(&nf->nf_ref, 1);
+ refcount_set(&nf->nf_ref, 1);
nf->nf_may = may & NFSD_FILE_MAY_MASK;
if (may & NFSD_MAY_NOT_BREAK_LEASE) {
if (may & NFSD_MAY_WRITE)
@@ -192,6 +194,7 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
}
nf->nf_mark = NULL;
+ init_rwsem(&nf->nf_rwsem);
trace_nfsd_file_alloc(nf);
}
return nf;
@@ -238,13 +241,6 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
}
-static bool
-nfsd_file_in_use(struct nfsd_file *nf)
-{
- return nfsd_file_check_writeback(nf) ||
- nfsd_file_check_write_error(nf);
-}
-
static void
nfsd_file_do_unhash(struct nfsd_file *nf)
{
@@ -256,8 +252,6 @@ nfsd_file_do_unhash(struct nfsd_file *nf)
nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
hlist_del_rcu(&nf->nf_node);
- if (!list_empty(&nf->nf_lru))
- list_lru_del(&nfsd_file_lru, &nf->nf_lru);
atomic_long_dec(&nfsd_filecache_count);
}
@@ -266,6 +260,8 @@ nfsd_file_unhash(struct nfsd_file *nf)
{
if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
nfsd_file_do_unhash(nf);
+ if (!list_empty(&nf->nf_lru))
+ list_lru_del(&nfsd_file_lru, &nf->nf_lru);
return true;
}
return false;
@@ -283,42 +279,48 @@ nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *disp
if (!nfsd_file_unhash(nf))
return false;
/* keep final reference for nfsd_file_lru_dispose */
- if (atomic_add_unless(&nf->nf_ref, -1, 1))
+ if (refcount_dec_not_one(&nf->nf_ref))
return true;
list_add(&nf->nf_lru, dispose);
return true;
}
-static int
+static void
nfsd_file_put_noref(struct nfsd_file *nf)
{
- int count;
trace_nfsd_file_put(nf);
- count = atomic_dec_return(&nf->nf_ref);
- if (!count) {
+ if (refcount_dec_and_test(&nf->nf_ref)) {
WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
nfsd_file_free(nf);
}
- return count;
}
void
nfsd_file_put(struct nfsd_file *nf)
{
- bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
- bool unused = !nfsd_file_in_use(nf);
+ bool is_hashed;
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
- if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
- nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
+ if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) {
+ nfsd_file_put_noref(nf);
+ return;
+ }
+
+ filemap_flush(nf->nf_file->f_mapping);
+ is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
+ nfsd_file_put_noref(nf);
+ if (is_hashed)
+ nfsd_file_schedule_laundrette();
+ if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
+ nfsd_file_gc();
}
struct nfsd_file *
nfsd_file_get(struct nfsd_file *nf)
{
- if (likely(atomic_inc_not_zero(&nf->nf_ref)))
+ if (likely(refcount_inc_not_zero(&nf->nf_ref)))
return nf;
return NULL;
}
@@ -344,7 +346,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
while(!list_empty(dispose)) {
nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
list_del(&nf->nf_lru);
- if (!atomic_dec_and_test(&nf->nf_ref))
+ if (!refcount_dec_and_test(&nf->nf_ref))
continue;
if (nfsd_file_free(nf))
flush = true;
@@ -353,6 +355,58 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
flush_delayed_fput();
}
+static void
+nfsd_file_list_remove_disposal(struct list_head *dst,
+ struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&l->lock);
+ list_splice_init(&l->freeme, dst);
+ spin_unlock(&l->lock);
+}
+
+static void
+nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net == net) {
+ spin_lock(&l->lock);
+ list_splice_tail_init(files, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+static void
+nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
+ struct net *net)
+{
+ struct nfsd_file *nf, *tmp;
+
+ list_for_each_entry_safe(nf, tmp, src, nf_lru) {
+ if (nf->nf_net == net)
+ list_move_tail(&nf->nf_lru, dst);
+ }
+}
+
+static void
+nfsd_file_dispose_list_delayed(struct list_head *dispose)
+{
+ LIST_HEAD(list);
+ struct nfsd_file *nf;
+
+ while(!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
+ nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
+ nfsd_file_list_add_disposal(&list, nf->nf_net);
+ }
+}
+
/*
* Note this can deadlock with nfsd_file_cache_purge.
*/
@@ -375,7 +429,7 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
* counter. Here we check the counter and then test and clear the flag.
* That order is deliberate to ensure that we can do this locklessly.
*/
- if (atomic_read(&nf->nf_ref) > 1)
+ if (refcount_read(&nf->nf_ref) > 1)
goto out_skip;
/*
@@ -386,31 +440,51 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
goto out_skip;
if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
- goto out_rescan;
+ goto out_skip;
if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
goto out_skip;
list_lru_isolate_move(lru, &nf->nf_lru, head);
return LRU_REMOVED;
-out_rescan:
- set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
out_skip:
return LRU_SKIP;
}
-static void
-nfsd_file_lru_dispose(struct list_head *head)
+static unsigned long
+nfsd_file_lru_walk_list(struct shrink_control *sc)
{
- while(!list_empty(head)) {
- struct nfsd_file *nf = list_first_entry(head,
- struct nfsd_file, nf_lru);
- list_del_init(&nf->nf_lru);
+ LIST_HEAD(head);
+ struct nfsd_file *nf;
+ unsigned long ret;
+
+ if (sc)
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &head);
+ else
+ ret = list_lru_walk(&nfsd_file_lru,
+ nfsd_file_lru_cb,
+ &head, LONG_MAX);
+ list_for_each_entry(nf, &head, nf_lru) {
spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
nfsd_file_do_unhash(nf);
spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
- nfsd_file_put_noref(nf);
}
+ nfsd_file_dispose_list_delayed(&head);
+ return ret;
+}
+
+static void
+nfsd_file_gc(void)
+{
+ nfsd_file_lru_walk_list(NULL);
+}
+
+static void
+nfsd_file_gc_worker(struct work_struct *work)
+{
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
}
static unsigned long
@@ -422,12 +496,7 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
static unsigned long
nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
{
- LIST_HEAD(head);
- unsigned long ret;
-
- ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
- nfsd_file_lru_dispose(&head);
- return ret;
+ return nfsd_file_lru_walk_list(sc);
}
static struct shrinker nfsd_file_shrinker = {
@@ -489,7 +558,7 @@ nfsd_file_close_inode(struct inode *inode)
__nfsd_file_close_inode(inode, hashval, &dispose);
trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
- nfsd_file_dispose_list(&dispose);
+ nfsd_file_dispose_list_delayed(&dispose);
}
/**
@@ -505,16 +574,11 @@ static void
nfsd_file_delayed_close(struct work_struct *work)
{
LIST_HEAD(head);
+ struct nfsd_fcache_disposal *l = container_of(work,
+ struct nfsd_fcache_disposal, work);
- list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
-
- if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
- nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
-
- if (!list_empty(&head)) {
- nfsd_file_lru_dispose(&head);
- flush_delayed_fput();
- }
+ nfsd_file_list_remove_disposal(&head, l);
+ nfsd_file_dispose_list(&head);
}
static int
@@ -575,6 +639,10 @@ nfsd_file_cache_init(void)
if (nfsd_file_hashtbl)
return 0;
+ nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
+ if (!nfsd_filecache_wq)
+ goto out;
+
nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
if (!nfsd_file_hashtbl) {
@@ -628,7 +696,7 @@ nfsd_file_cache_init(void)
spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
}
- INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
out:
return ret;
out_notifier:
@@ -644,6 +712,8 @@ out_err:
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
goto out;
}
@@ -682,6 +752,88 @@ nfsd_file_cache_purge(struct net *net)
}
}
+static struct nfsd_fcache_disposal *
+nfsd_alloc_fcache_disposal(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = kmalloc(sizeof(*l), GFP_KERNEL);
+ if (!l)
+ return NULL;
+ INIT_WORK(&l->work, nfsd_file_delayed_close);
+ l->net = net;
+ spin_lock_init(&l->lock);
+ INIT_LIST_HEAD(&l->freeme);
+ return l;
+}
+
+static void
+nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ rcu_assign_pointer(l->net, NULL);
+ cancel_work_sync(&l->work);
+ nfsd_file_dispose_list(&l->freeme);
+ kfree_rcu(l, rcu);
+}
+
+static void
+nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_add_tail_rcu(&l->list, &laundrettes);
+ spin_unlock(&laundrette_lock);
+}
+
+static void
+nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ spin_lock(&laundrette_lock);
+ list_del_rcu(&l->list);
+ spin_unlock(&laundrette_lock);
+}
+
+static int
+nfsd_alloc_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = nfsd_alloc_fcache_disposal(net);
+ if (!l)
+ return -ENOMEM;
+ nfsd_add_fcache_disposal(l);
+ return 0;
+}
+
+static void
+nfsd_free_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_fcache_disposal *l;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(l, &laundrettes, list) {
+ if (l->net != net)
+ continue;
+ nfsd_del_fcache_disposal(l);
+ rcu_read_unlock();
+ nfsd_free_fcache_disposal(l);
+ return;
+ }
+ rcu_read_unlock();
+}
+
+int
+nfsd_file_cache_start_net(struct net *net)
+{
+ return nfsd_alloc_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown_net(struct net *net)
+{
+ nfsd_file_cache_purge(net);
+ nfsd_free_fcache_disposal_net(net);
+}
+
void
nfsd_file_cache_shutdown(void)
{
@@ -706,6 +858,8 @@ nfsd_file_cache_shutdown(void)
nfsd_file_mark_slab = NULL;
kfree(nfsd_file_hashtbl);
nfsd_file_hashtbl = NULL;
+ destroy_workqueue(nfsd_filecache_wq);
+ nfsd_filecache_wq = NULL;
}
static bool
@@ -789,6 +943,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file *nf, *new;
struct inode *inode;
unsigned int hashval;
+ bool retry = true;
/* FIXME: skip this if fh_dentry is already set? */
status = fh_verify(rqstp, fhp, S_IFREG,
@@ -824,6 +979,11 @@ wait_for_construction:
/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ if (!retry) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+ retry = false;
nfsd_file_put_noref(nf);
goto retry;
}
@@ -858,7 +1018,7 @@ out:
open_file:
nf = new;
/* Take reference for the hashtable */
- atomic_inc(&nf->nf_ref);
+ refcount_inc(&nf->nf_ref);
__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
list_lru_add(&nfsd_file_lru, &nf->nf_lru);
@@ -867,7 +1027,8 @@ open_file:
nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
nfsd_file_hashtbl[hashval].nfb_count);
spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
- atomic_long_inc(&nfsd_filecache_count);
+ if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
+ nfsd_file_gc();
nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 851d9abf54c2..7872df5a0fe3 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -19,7 +19,7 @@
*/
struct nfsd_file_mark {
struct fsnotify_mark nfm_mark;
- atomic_t nfm_ref;
+ refcount_t nfm_ref;
};
/*
@@ -43,14 +43,17 @@ struct nfsd_file {
unsigned long nf_flags;
struct inode *nf_inode;
unsigned int nf_hashval;
- atomic_t nf_ref;
+ refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
+ struct rw_semaphore nf_rwsem;
};
int nfsd_file_cache_init(void);
void nfsd_file_cache_purge(struct net *);
void nfsd_file_cache_shutdown(void);
+int nfsd_file_cache_start_net(struct net *net);
+void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9a4ef815fb8c..2baf32311e00 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -40,7 +40,7 @@ struct nfsd_net {
struct lock_manager nfsd4_manager;
bool grace_ended;
- time_t boot_time;
+ time64_t boot_time;
/* internal mount of the "nfsd" pseudofilesystem: */
struct vfsmount *nfsd_mnt;
@@ -92,8 +92,8 @@ struct nfsd_net {
bool in_grace;
const struct nfsd4_client_tracking_ops *client_tracking_ops;
- time_t nfsd4_lease;
- time_t nfsd4_grace;
+ time64_t nfsd4_lease;
+ time64_t nfsd4_grace;
bool somebody_reclaimed;
bool track_reclaim_completes;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index cea68d8411ac..288bc76b4574 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -203,7 +203,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_io);
nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
rqstp->rq_vec, nvecs, &cnt,
- resp->committed);
+ resp->committed, resp->verf);
resp->count = cnt;
RETURN_STATUS(nfserr);
}
@@ -683,7 +683,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
RETURN_STATUS(nfserr_inval);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+ nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count,
+ resp->verf);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 195ab7a0fc89..aae514d40b64 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -32,14 +32,14 @@ static u32 nfs3_ftypes[] = {
* XDR functions for basic NFS types
*/
static __be32 *
-encode_time3(__be32 *p, struct timespec *time)
+encode_time3(__be32 *p, struct timespec64 *time)
{
*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
return p;
}
static __be32 *
-decode_time3(__be32 *p, struct timespec *time)
+decode_time3(__be32 *p, struct timespec64 *time)
{
time->tv_sec = ntohl(*p++);
time->tv_nsec = ntohl(*p++);
@@ -167,7 +167,6 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
{
struct user_namespace *userns = nfsd_user_namespace(rqstp);
- struct timespec ts;
*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
*p++ = htonl((u32) (stat->mode & S_IALLUGO));
*p++ = htonl((u32) stat->nlink);
@@ -183,12 +182,9 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
*p++ = htonl((u32) MINOR(stat->rdev));
p = encode_fsid(p, fhp);
p = xdr_encode_hyper(p, stat->ino);
- ts = timespec64_to_timespec(stat->atime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->mtime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->ctime);
- p = encode_time3(p, &ts);
+ p = encode_time3(p, &stat->atime);
+ p = encode_time3(p, &stat->mtime);
+ p = encode_time3(p, &stat->ctime);
return p;
}
@@ -277,8 +273,8 @@ void fill_pre_wcc(struct svc_fh *fhp)
stat.size = inode->i_size;
}
- fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime);
- fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime);
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
fhp->fh_pre_saved = true;
@@ -330,7 +326,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp));
if ((args->check_guard = ntohl(*p++)) != 0) {
- struct timespec time;
+ struct timespec64 time;
p = decode_time3(p, &time);
args->guardtime = time.tv_sec;
}
@@ -751,17 +747,13 @@ int
nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_writeres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- __be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
- /* unique identifier, y2038 overflow can be ignored */
- nfsd_copy_boot_verifier(verf, nn);
- *p++ = verf[0];
- *p++ = verf[1];
+ *p++ = resp->verf[0];
+ *p++ = resp->verf[1];
}
return xdr_ressize_check(rqstp, p);
}
@@ -1125,16 +1117,12 @@ int
nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_commitres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- __be32 verf[2];
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
- /* unique identifier, y2038 overflow can be ignored */
- nfsd_copy_boot_verifier(verf, nn);
- *p++ = verf[0];
- *p++ = verf[1];
+ *p++ = resp->verf[0];
+ *p++ = resp->verf[1];
}
return xdr_ressize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24534db87e86..c3b11a715082 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -823,7 +823,16 @@ static const struct rpc_program cb_program = {
static int max_cb_time(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
+
+ /*
+ * nfsd4_lease is set to at most one hour in __nfsd4_write_time,
+ * so we can use 32-bit math on it. Warn if that assumption
+ * ever stops being true.
+ */
+ if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
+ return 360 * HZ;
+
+ return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
}
static struct workqueue_struct *callback_wq;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2681c70283ce..e12409eca7cc 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/* Client gets 2 lease periods to return it */
cutoff = ktime_add_ns(task->tk_start,
- nn->nfsd4_lease * NSEC_PER_SEC * 2);
+ (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2);
if (ktime_before(now, cutoff)) {
rpc_delay(task, HZ/100); /* 10 mili-seconds */
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4798667af647..0e75f7fb5fec 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,7 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/sunrpc/addr.h>
#include "idmap.h"
#include "cache.h"
@@ -232,7 +233,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
if (!*resfh)
return nfserr_jukebox;
fh_init(*resfh, NFS4_FHSIZE);
- open->op_truncate = 0;
+ open->op_truncate = false;
if (open->op_create) {
/* FIXME: check session persistence and pnfs flags.
@@ -365,7 +366,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
- open->op_created = 0;
+ open->op_created = false;
/*
* RFC5661 18.51.3
* Before RECLAIM_COMPLETE done, server should deny new lock
@@ -503,12 +504,20 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 ret;
fh_put(&cstate->current_fh);
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
putfh->pf_fhlen);
- return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+ ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ if (ret == nfserr_stale && putfh->no_verify) {
+ SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN);
+ ret = 0;
+ }
+#endif
+ return ret;
}
static __be32
@@ -530,9 +539,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_restorefh;
fh_dup2(&cstate->current_fh, &cstate->save_fh);
- if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) {
memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -542,9 +551,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
fh_dup2(&cstate->save_fh, &cstate->current_fh);
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -581,9 +590,9 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_commit *commit = &u->commit;
- gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
- commit->co_count);
+ commit->co_count,
+ (__be32 *)commit->co_verf.data);
}
static __be32
@@ -776,7 +785,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
- &read->rd_nf);
+ &read->rd_nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -948,7 +957,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
- WR_STATE, NULL);
+ WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -975,7 +984,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time_t)0);
+ 0, (time64_t)0);
out:
fh_drop_write(&cstate->current_fh);
return status;
@@ -999,22 +1008,22 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- stateid, WR_STATE, &nf);
+ stateid, WR_STATE, &nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
}
write->wr_how_written = write->wr_stable_how;
- gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
- status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf->nf_file,
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written);
+ write->wr_how_written,
+ (__be32 *)write->wr_verifier.data);
nfsd_file_put(nf);
write->wr_bytes_written = cnt;
@@ -1034,14 +1043,14 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_nofilehandle;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
- src_stateid, RD_STATE, src);
+ src_stateid, RD_STATE, src, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out;
}
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- dst_stateid, WR_STATE, dst);
+ dst_stateid, WR_STATE, dst, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src;
@@ -1076,8 +1085,8 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src->nf_file, clone->cl_src_pos,
- dst->nf_file, clone->cl_dst_pos, clone->cl_count,
+ status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+ dst, clone->cl_dst_pos, clone->cl_count,
EX_ISSYNC(cstate->current_fh.fh_export));
nfsd_file_put(dst);
@@ -1135,6 +1144,207 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
while ((copy = nfsd4_get_copy(clp)) != NULL)
nfsd4_stop_copy(copy);
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+
+extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid);
+extern void nfs42_ssc_close(struct file *filep);
+
+extern void nfs_sb_deactive(struct super_block *sb);
+
+#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
+
+/**
+ * Support one copy source server for now.
+ */
+static __be32
+nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ struct vfsmount **mount)
+{
+ struct file_system_type *type;
+ struct vfsmount *ss_mnt;
+ struct nfs42_netaddr *naddr;
+ struct sockaddr_storage tmp_addr;
+ size_t tmp_addrlen, match_netid_len = 3;
+ char *startsep = "", *endsep = "", *match_netid = "tcp";
+ char *ipaddr, *dev_name, *raw_data;
+ int len, raw_len;
+ __be32 status = nfserr_inval;
+
+ naddr = &nss->u.nl4_addr;
+ tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
+ naddr->addr_len,
+ (struct sockaddr *)&tmp_addr,
+ sizeof(tmp_addr));
+ if (tmp_addrlen == 0)
+ goto out_err;
+
+ if (tmp_addr.ss_family == AF_INET6) {
+ startsep = "[";
+ endsep = "]";
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ }
+
+ if (naddr->netid_len != match_netid_len ||
+ strncmp(naddr->netid, match_netid, naddr->netid_len))
+ goto out_err;
+
+ /* Construct the raw data for the vfs_kern_mount call */
+ len = RPC_MAX_ADDRBUFLEN + 1;
+ ipaddr = kzalloc(len, GFP_KERNEL);
+ if (!ipaddr)
+ goto out_err;
+
+ rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len);
+
+ /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/
+
+ raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr);
+ raw_data = kzalloc(raw_len, GFP_KERNEL);
+ if (!raw_data)
+ goto out_free_ipaddr;
+
+ snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr);
+
+ status = nfserr_nodev;
+ type = get_fs_type("nfs");
+ if (!type)
+ goto out_free_rawdata;
+
+ /* Set the server:<export> for the vfs_kern_mount call */
+ dev_name = kzalloc(len + 5, GFP_KERNEL);
+ if (!dev_name)
+ goto out_free_rawdata;
+ snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+
+ /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
+ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
+ module_put(type->owner);
+ if (IS_ERR(ss_mnt))
+ goto out_free_devname;
+
+ status = 0;
+ *mount = ss_mnt;
+
+out_free_devname:
+ kfree(dev_name);
+out_free_rawdata:
+ kfree(raw_data);
+out_free_ipaddr:
+ kfree(ipaddr);
+out_err:
+ return status;
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+ nfs_sb_deactive(ss_mnt->mnt_sb);
+ mntput(ss_mnt);
+}
+
+/**
+ * nfsd4_setup_inter_ssc
+ *
+ * Verify COPY destination stateid.
+ * Connect to the source server with NFSv4.1.
+ * Create the source struct file for nfsd_copy_range.
+ * Called with COPY cstate:
+ * SAVED_FH: source filehandle
+ * CURRENT_FH: destination filehandle
+ */
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy, struct vfsmount **mount)
+{
+ struct svc_fh *s_fh = NULL;
+ stateid_t *s_stid = &copy->cp_src_stateid;
+ __be32 status = nfserr_inval;
+
+ /* Verify the destination stateid and set dst struct file*/
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &copy->cp_dst_stateid,
+ WR_STATE, &copy->nf_dst, NULL);
+ if (status)
+ goto out;
+
+ status = nfsd4_interssc_connect(&copy->cp_src, rqstp, mount);
+ if (status)
+ goto out;
+
+ s_fh = &cstate->save_fh;
+
+ copy->c_fh.size = s_fh->fh_handle.fh_size;
+ memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size);
+ copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
+ memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
+ sizeof(stateid_opaque_t));
+
+ status = 0;
+out:
+ return status;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ struct nfsd_file *dst)
+{
+ nfs42_ssc_close(src->nf_file);
+ nfsd_file_put(src);
+ nfsd_file_put(dst);
+ mntput(ss_mnt);
+}
+
+#else /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy,
+ struct vfsmount **mount)
+{
+ *mount = NULL;
+ return nfserr_inval;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
+ struct nfsd_file *dst)
+{
+}
+
+static void
+nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
+{
+}
+
+static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ return nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
+ &copy->nf_src, &copy->cp_dst_stateid,
+ &copy->nf_dst);
+}
+
+static void
+nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst)
+{
+ nfsd_file_put(src);
+ nfsd_file_put(dst);
+}
static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
@@ -1200,12 +1410,16 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
status = nfs_ok;
}
- nfsd_file_put(copy->nf_src);
- nfsd_file_put(copy->nf_dst);
+ if (!copy->cp_intra) /* Inter server SSC */
+ nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src,
+ copy->nf_dst);
+ else
+ nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst);
+
return status;
}
-static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
{
dst->cp_src_pos = src->cp_src_pos;
dst->cp_dst_pos = src->cp_dst_pos;
@@ -1215,15 +1429,25 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(&dst->fh, &src->fh, sizeof(src->fh));
dst->cp_clp = src->cp_clp;
dst->nf_dst = nfsd_file_get(src->nf_dst);
- dst->nf_src = nfsd_file_get(src->nf_src);
+ dst->cp_intra = src->cp_intra;
+ if (src->cp_intra) /* for inter, file_src doesn't exist yet */
+ dst->nf_src = nfsd_file_get(src->nf_src);
+
memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+ memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server));
+ memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+ memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+ dst->ss_mnt = src->ss_mnt;
+
+ return 0;
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
{
- nfs4_free_cp_state(copy);
+ nfs4_free_copy_state(copy);
nfsd_file_put(copy->nf_dst);
- nfsd_file_put(copy->nf_src);
+ if (copy->cp_intra)
+ nfsd_file_put(copy->nf_src);
spin_lock(&copy->cp_clp->async_lock);
list_del(&copy->copies);
spin_unlock(&copy->cp_clp->async_lock);
@@ -1235,7 +1459,24 @@ static int nfsd4_do_async_copy(void *data)
struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
struct nfsd4_copy *cb_copy;
+ if (!copy->cp_intra) { /* Inter server SSC */
+ copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL);
+ if (!copy->nf_src) {
+ copy->nfserr = nfserr_serverfault;
+ nfsd4_interssc_disconnect(copy->ss_mnt);
+ goto do_callback;
+ }
+ copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, &copy->c_fh,
+ &copy->stateid);
+ if (IS_ERR(copy->nf_src->nf_file)) {
+ copy->nfserr = nfserr_offload_denied;
+ nfsd4_interssc_disconnect(copy->ss_mnt);
+ goto do_callback;
+ }
+ }
+
copy->nfserr = nfsd4_do_copy(copy, 0);
+do_callback:
cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!cb_copy)
goto out;
@@ -1247,6 +1488,8 @@ static int nfsd4_do_async_copy(void *data)
&nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
nfsd4_run_cb(&cb_copy->cp_cb);
out:
+ if (!copy->cp_intra)
+ kfree(copy->nf_src);
cleanup_async_copy(copy);
return 0;
}
@@ -1259,11 +1502,20 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd4_copy *async_copy = NULL;
- status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
- &copy->nf_src, &copy->cp_dst_stateid,
- &copy->nf_dst);
- if (status)
- goto out;
+ if (!copy->cp_intra) { /* Inter server SSC */
+ if (!inter_copy_offload_enable || copy->cp_synchronous) {
+ status = nfserr_notsupp;
+ goto out;
+ }
+ status = nfsd4_setup_inter_ssc(rqstp, cstate, copy,
+ &copy->ss_mnt);
+ if (status)
+ return nfserr_offload_denied;
+ } else {
+ status = nfsd4_setup_intra_ssc(rqstp, cstate, copy);
+ if (status)
+ return status;
+ }
copy->cp_clp = cstate->clp;
memcpy(&copy->fh, &cstate->current_fh.fh_handle,
@@ -1274,15 +1526,15 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserrno(-ENOMEM);
async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!async_copy)
- goto out;
- if (!nfs4_init_cp_state(nn, copy)) {
- kfree(async_copy);
- goto out;
- }
+ goto out_err;
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_err;
refcount_set(&async_copy->refcount, 1);
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
sizeof(copy->cp_stateid));
- dup_copy_fields(copy, async_copy);
+ status = dup_copy_fields(copy, async_copy);
+ if (status)
+ goto out_err;
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
@@ -1293,13 +1545,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_unlock(&async_copy->cp_clp->async_lock);
wake_up_process(async_copy->copy_task);
status = nfs_ok;
- } else
+ } else {
status = nfsd4_do_copy(copy, 1);
+ }
out:
return status;
out_err:
if (async_copy)
cleanup_async_copy(async_copy);
+ status = nfserrno(-ENOMEM);
+ if (!copy->cp_intra)
+ nfsd4_interssc_disconnect(copy->ss_mnt);
goto out;
}
@@ -1310,7 +1566,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
spin_lock(&clp->async_lock);
list_for_each_entry(copy, &clp->async_copies, copies) {
- if (memcmp(&copy->cp_stateid, stateid, NFS4_STATEID_SIZE))
+ if (memcmp(&copy->cp_stateid.stid, stateid, NFS4_STATEID_SIZE))
continue;
refcount_inc(&copy->refcount);
spin_unlock(&clp->async_lock);
@@ -1326,16 +1582,61 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp,
union nfsd4_op_u *u)
{
struct nfsd4_offload_status *os = &u->offload_status;
- __be32 status = 0;
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
copy = find_async_copy(clp, &os->stateid);
- if (copy)
+ if (!copy) {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ return manage_cpntf_state(nn, &os->stateid, clp, NULL);
+ } else
nfsd4_stop_copy(copy);
- else
- status = nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_stid *stid;
+ struct nfs4_cpntf_state *cps;
+ struct nfs4_client *clp = cstate->clp;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &cn->cpn_src_stateid, RD_STATE, NULL,
+ &stid);
+ if (status)
+ return status;
+
+ cn->cpn_sec = nn->nfsd4_lease;
+ cn->cpn_nsec = 0;
+
+ status = nfserrno(-ENOMEM);
+ cps = nfs4_alloc_init_cpntf_state(nn, stid);
+ if (!cps)
+ goto out;
+ memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
+
+ /* For now, only return one server address in cpn_src, the
+ * address used by the client to connect to this server.
+ */
+ cn->cpn_src.nl4_type = NL4_NETADDR;
+ status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+ &cn->cpn_src.u.nl4_addr);
+ WARN_ON_ONCE(status);
+ if (status) {
+ nfs4_put_cpntf_state(nn, cps);
+ goto out;
+ }
+out:
+ nfs4_put_stid(stid);
return status;
}
@@ -1348,7 +1649,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
- WR_STATE, &nf);
+ WR_STATE, &nf, NULL);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
@@ -1407,7 +1708,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
- RD_STATE, &nf);
+ RD_STATE, &nf, NULL);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
@@ -1912,6 +2213,45 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
- rqstp->rq_auth_slack;
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+ struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL;
+ struct nfsd4_copy *copy;
+ struct nfsd4_putfh *putfh;
+ int i;
+
+ /* traverse all operation and if it's a COPY compound, mark the
+ * source filehandle to skip verification
+ */
+ for (i = 0; i < args->opcnt; i++) {
+ op = &args->ops[i];
+ if (op->opnum == OP_PUTFH)
+ current_op = op;
+ else if (op->opnum == OP_SAVEFH)
+ saved_op = current_op;
+ else if (op->opnum == OP_RESTOREFH)
+ current_op = saved_op;
+ else if (op->opnum == OP_COPY) {
+ copy = (struct nfsd4_copy *)&op->u;
+ if (!saved_op) {
+ op->status = nfserr_nofilehandle;
+ return;
+ }
+ putfh = (struct nfsd4_putfh *)&saved_op->u;
+ if (!copy->cp_intra)
+ putfh->no_verify = true;
+ }
+ }
+}
+#else
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
+}
+#endif
+
/*
* COMPOUND call.
*/
@@ -1960,6 +2300,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
resp->opcnt = 1;
goto encode_op;
}
+ check_if_stalefh_allowed(args);
trace_nfsd_compound(rqstp, args->opcnt);
while (!status && resp->opcnt < args->opcnt) {
@@ -1975,13 +2316,14 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
op->status = nfsd4_open_omfg(rqstp, cstate, op);
goto encode_op;
}
-
- if (!current_fh->fh_dentry) {
+ if (!current_fh->fh_dentry &&
+ !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) {
if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
- } else if (current_fh->fh_export->ex_fslocs.migrated &&
+ } else if (current_fh->fh_export &&
+ current_fh->fh_export->ex_fslocs.migrated &&
!(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
@@ -2025,7 +2367,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_flags & OP_CLEAR_STATEID)
clear_current_stateid(cstate);
- if (need_wrongsec_check(rqstp))
+ if (current_fh->fh_export &&
+ need_wrongsec_check(rqstp))
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
encode_op:
@@ -2292,6 +2635,21 @@ static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
}
+static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp,
+ struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 3 /* cnr_lease_time */ +
+ 1 /* We support one cnr_source_server */ +
+ 1 /* cnr_stateid seq */ +
+ op_encode_stateid_maxsz /* cnr_stateid */ +
+ 1 /* num cnr_source_server*/ +
+ 1 /* nl4_type */ +
+ 1 /* nl4 size */ +
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+ * sizeof(__be32);
+}
+
#ifdef CONFIG_NFSD_PNFS
static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
@@ -2716,6 +3074,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_OFFLOAD_CANCEL",
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_COPY_NOTIFY] = {
+ .op_func = nfsd4_copy_notify,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_COPY_NOTIFY",
+ .op_rsize_bop = nfsd4_copy_notify_rsize,
+ },
};
/**
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2481e7662128..a8fb18609146 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1445,7 +1445,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
}
cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
- cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+ cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
if (!ret)
ret = cup->cu_u.cu_msg.cm_status;
@@ -1782,7 +1782,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
}
static char *
-nfsd4_cltrack_grace_start(time_t grace_start)
+nfsd4_cltrack_grace_start(time64_t grace_start)
{
int copied;
size_t len;
@@ -1795,7 +1795,7 @@ nfsd4_cltrack_grace_start(time_t grace_start)
if (!result)
return result;
- copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
+ copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld",
grace_start);
if (copied >= len) {
/* just return nothing if output was truncated */
@@ -2004,7 +2004,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
char *legacy;
char timestr[22]; /* FIXME: better way to determine max size? */
- sprintf(timestr, "%ld", nn->boot_time);
+ sprintf(timestr, "%lld", nn->boot_time);
legacy = nfsd4_cltrack_legacy_topdir();
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
kfree(legacy);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 369e574c5092..65cfe9ab47be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -80,6 +80,7 @@ static u64 current_sessionid = 1;
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
+static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
/* Locking: */
@@ -170,7 +171,7 @@ renew_client_locked(struct nfs4_client *clp)
clp->cl_clientid.cl_boot,
clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &nn->client_lru);
- clp->cl_time = get_seconds();
+ clp->cl_time = ktime_get_boottime_seconds();
}
static void put_client_renew_locked(struct nfs4_client *clp)
@@ -722,6 +723,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
/* Will be incremented before return to client: */
refcount_set(&stid->sc_count, 1);
spin_lock_init(&stid->sc_lock);
+ INIT_LIST_HEAD(&stid->sc_cp_list);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -741,30 +743,76 @@ out_free:
/*
* Create a unique stateid_t to represent each COPY.
*/
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+ unsigned char sc_type)
{
int new_id;
+ stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+ stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+ stid->sc_type = sc_type;
+
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
- new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+ new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
+ stid->stid.si_opaque.so_id = new_id;
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
return 0;
- copy->cp_stateid.si_opaque.so_id = new_id;
- copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
- copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
return 1;
}
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+ return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid)
+{
+ struct nfs4_cpntf_state *cps;
+
+ cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+ if (!cps)
+ return NULL;
+ cps->cpntf_time = ktime_get_boottime_seconds();
+ refcount_set(&cps->cp_stateid.sc_count, 1);
+ if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+ goto out_free;
+ spin_lock(&nn->s2s_cp_lock);
+ list_add(&cps->cp_list, &p_stid->sc_cp_list);
+ spin_unlock(&nn->s2s_cp_lock);
+ return cps;
+out_free:
+ kfree(cps);
+ return NULL;
+}
+
+void nfs4_free_copy_state(struct nfsd4_copy *copy)
{
struct nfsd_net *nn;
+ WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID);
nn = net_generic(copy->cp_clp->net, nfsd_net_id);
spin_lock(&nn->s2s_cp_lock);
- idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+ idr_remove(&nn->s2s_cp_stateids,
+ copy->cp_stateid.stid.si_opaque.so_id);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
+static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
+{
+ struct nfs4_cpntf_state *cps;
+ struct nfsd_net *nn;
+
+ nn = net_generic(net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ while (!list_empty(&stid->sc_cp_list)) {
+ cps = list_first_entry(&stid->sc_cp_list,
+ struct nfs4_cpntf_state, cp_list);
+ _free_cpntf_state_locked(nn, cps);
+ }
spin_unlock(&nn->s2s_cp_lock);
}
@@ -806,7 +854,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
static DEFINE_SPINLOCK(blocked_delegations_lock);
static struct bloom_pair {
int entries, old_entries;
- time_t swap_time;
+ time64_t swap_time;
int new; /* index into 'set' */
DECLARE_BITMAP(set[2], 256);
} blocked_delegations;
@@ -818,15 +866,15 @@ static int delegation_blocked(struct knfsd_fh *fh)
if (bd->entries == 0)
return 0;
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
spin_lock(&blocked_delegations_lock);
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
memset(bd->set[bd->new], 0,
sizeof(bd->set[0]));
bd->new = 1-bd->new;
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
}
spin_unlock(&blocked_delegations_lock);
}
@@ -856,7 +904,7 @@ static void block_delegations(struct knfsd_fh *fh)
__set_bit((hash>>8)&255, bd->set[bd->new]);
__set_bit((hash>>16)&255, bd->set[bd->new]);
if (bd->entries == 0)
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
bd->entries += 1;
spin_unlock(&blocked_delegations_lock);
}
@@ -915,6 +963,7 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
if (fp)
@@ -1862,7 +1911,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
*/
if (clid->cl_boot == (u32)nn->boot_time)
return 0;
- dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
+ dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n",
clid->cl_boot, clid->cl_id, nn->boot_time);
return 1;
}
@@ -2215,14 +2264,14 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy
*/
- verf[0] = (__force __be32)get_seconds();
+ verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
verf[1] = (__force __be32)nn->clverifier_counter++;
memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
{
- clp->cl_clientid.cl_boot = nn->boot_time;
+ clp->cl_clientid.cl_boot = (u32)nn->boot_time;
clp->cl_clientid.cl_id = nn->clientid_counter++;
gen_confirm(clp, nn);
}
@@ -2292,7 +2341,7 @@ static int client_info_show(struct seq_file *m, void *v)
clp->cl_nii_domain.len);
seq_printf(m, "\nImplementation name: ");
seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
- seq_printf(m, "\nImplementation time: [%ld, %ld]\n",
+ seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
drop_client(clp);
@@ -2612,7 +2661,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
gen_clid(clp, nn);
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
- clp->cl_time = get_seconds();
+ clp->cl_time = ktime_get_boottime_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
@@ -2946,8 +2995,7 @@ static __be32 copy_impl_id(struct nfs4_client *clp,
xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
if (!clp->cl_nii_name.data)
return nfserr_jukebox;
- clp->cl_nii_time.tv_sec = exid->nii_time.tv_sec;
- clp->cl_nii_time.tv_nsec = exid->nii_time.tv_nsec;
+ clp->cl_nii_time = exid->nii_time;
return 0;
}
@@ -3373,7 +3421,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
case NFS4_CDFC4_BACK_OR_BOTH:
*dir = NFS4_CDFC4_BOTH;
return nfs_ok;
- };
+ }
return nfserr_inval;
}
@@ -4283,7 +4331,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
last = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = s;
list_move_tail(&oo->oo_close_lru, &nn->close_lru);
- oo->oo_time = get_seconds();
+ oo->oo_time = ktime_get_boottime_seconds();
spin_unlock(&nn->client_lock);
if (last)
nfs4_put_stid(&last->st_stid);
@@ -4378,7 +4426,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
*/
spin_lock(&state_lock);
if (dp->dl_time == 0) {
- dp->dl_time = get_seconds();
+ dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
spin_unlock(&state_lock);
@@ -4490,7 +4538,8 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
static __be32 lookup_clientid(clientid_t *clid,
struct nfsd4_compound_state *cstate,
- struct nfsd_net *nn)
+ struct nfsd_net *nn,
+ bool sessions)
{
struct nfs4_client *found;
@@ -4511,7 +4560,7 @@ static __be32 lookup_clientid(clientid_t *clid,
*/
WARN_ON_ONCE(cstate->session);
spin_lock(&nn->client_lock);
- found = find_confirmed_client(clid, false, nn);
+ found = find_confirmed_client(clid, sessions, nn);
if (!found) {
spin_unlock(&nn->client_lock);
return nfserr_expired;
@@ -4544,7 +4593,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
if (open->op_file == NULL)
return nfserr_jukebox;
- status = lookup_clientid(clientid, cstate, nn);
+ status = lookup_clientid(clientid, cstate, nn, false);
if (status)
return status;
clp = cstate->clp;
@@ -4672,7 +4721,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+ return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0);
}
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
@@ -5133,7 +5182,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
goto out;
clp = cstate->clp;
@@ -5184,9 +5233,8 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
static bool clients_still_reclaiming(struct nfsd_net *nn)
{
- unsigned long now = get_seconds();
- unsigned long double_grace_period_end = nn->boot_time +
- 2 * nn->nfsd4_lease;
+ time64_t double_grace_period_end = nn->boot_time +
+ 2 * nn->nfsd4_lease;
if (nn->track_reclaim_completes &&
atomic_read(&nn->nr_reclaim_complete) ==
@@ -5199,12 +5247,12 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
* If we've given them *two* lease times to reclaim, and they're
* still not done, give up:
*/
- if (time_after(now, double_grace_period_end))
+ if (ktime_get_boottime_seconds() > double_grace_period_end)
return false;
return true;
}
-static time_t
+static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
struct nfs4_client *clp;
@@ -5213,8 +5261,11 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_ol_stateid *stp;
struct nfsd4_blocked_lock *nbl;
struct list_head *pos, *next, reaplist;
- time_t cutoff = get_seconds() - nn->nfsd4_lease;
- time_t t, new_timeo = nn->nfsd4_lease;
+ time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease;
+ time64_t t, new_timeo = nn->nfsd4_lease;
+ struct nfs4_cpntf_state *cps;
+ copy_stateid_t *cps_t;
+ int i;
dprintk("NFSD: laundromat service - starting\n");
@@ -5225,10 +5276,20 @@ nfs4_laundromat(struct nfsd_net *nn)
dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
+
+ spin_lock(&nn->s2s_cp_lock);
+ idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+ cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+ if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
+ cps->cpntf_time > cutoff)
+ _free_cpntf_state_locked(nn, cps);
+ }
+ spin_unlock(&nn->s2s_cp_lock);
+
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+ if (clp->cl_time > cutoff) {
t = clp->cl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5251,7 +5312,7 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+ if (dp->dl_time > cutoff) {
t = dp->dl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5271,8 +5332,7 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->close_lru)) {
oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
oo_close_lru);
- if (time_after((unsigned long)oo->oo_time,
- (unsigned long)cutoff)) {
+ if (oo->oo_time > cutoff) {
t = oo->oo_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5302,8 +5362,7 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->blocked_locks_lru)) {
nbl = list_first_entry(&nn->blocked_locks_lru,
struct nfsd4_blocked_lock, nbl_lru);
- if (time_after((unsigned long)nbl->nbl_time,
- (unsigned long)cutoff)) {
+ if (nbl->nbl_time > cutoff) {
t = nbl->nbl_time - cutoff;
new_timeo = min(new_timeo, t);
break;
@@ -5320,7 +5379,7 @@ nfs4_laundromat(struct nfsd_net *nn)
free_blocked_lock(nbl);
}
out:
- new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
+ new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
}
@@ -5330,13 +5389,13 @@ static void laundromat_main(struct work_struct *);
static void
laundromat_main(struct work_struct *laundry)
{
- time_t t;
+ time64_t t;
struct delayed_work *dwork = to_delayed_work(laundry);
struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
laundromat_work);
t = nfs4_laundromat(nn);
- dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+ dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t);
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
@@ -5521,7 +5580,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
- status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
+ status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn,
+ false);
if (status == nfserr_stale_clientid) {
if (cstate->session)
return nfserr_bad_stateid;
@@ -5600,6 +5660,85 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
out:
return status;
}
+static void
+_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID);
+ if (!refcount_dec_and_test(&cps->cp_stateid.sc_count))
+ return;
+ list_del(&cps->cp_list);
+ idr_remove(&nn->s2s_cp_stateids,
+ cps->cp_stateid.stid.si_opaque.so_id);
+ kfree(cps);
+}
+/*
+ * A READ from an inter server to server COPY will have a
+ * copy stateid. Look up the copy notify stateid from the
+ * idr structure and take a reference on it.
+ */
+__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps)
+{
+ copy_stateid_t *cps_t;
+ struct nfs4_cpntf_state *state = NULL;
+
+ if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id)
+ return nfserr_bad_stateid;
+ spin_lock(&nn->s2s_cp_lock);
+ cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id);
+ if (cps_t) {
+ state = container_of(cps_t, struct nfs4_cpntf_state,
+ cp_stateid);
+ if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) {
+ state = NULL;
+ goto unlock;
+ }
+ if (!clp)
+ refcount_inc(&state->cp_stateid.sc_count);
+ else
+ _free_cpntf_state_locked(nn, state);
+ }
+unlock:
+ spin_unlock(&nn->s2s_cp_lock);
+ if (!state)
+ return nfserr_bad_stateid;
+ if (!clp && state)
+ *cps = state;
+ return 0;
+}
+
+static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_stid **stid)
+{
+ __be32 status;
+ struct nfs4_cpntf_state *cps = NULL;
+ struct nfsd4_compound_state cstate;
+
+ status = manage_cpntf_state(nn, st, NULL, &cps);
+ if (status)
+ return status;
+
+ cps->cpntf_time = ktime_get_boottime_seconds();
+ memset(&cstate, 0, sizeof(cstate));
+ status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true);
+ if (status)
+ goto out;
+ status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid,
+ NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+ stid, nn);
+ put_client_renew(cstate.clp);
+out:
+ nfs4_put_cpntf_state(nn, cps);
+ return status;
+}
+
+void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ spin_lock(&nn->s2s_cp_lock);
+ _free_cpntf_state_locked(nn, cps);
+ spin_unlock(&nn->s2s_cp_lock);
+}
/*
* Checks for stateid operations
@@ -5607,7 +5746,8 @@ out:
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct nfsd_file **nfp)
+ stateid_t *stateid, int flags, struct nfsd_file **nfp,
+ struct nfs4_stid **cstid)
{
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
@@ -5629,6 +5769,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
+ if (status == nfserr_bad_stateid)
+ status = find_cpntf_state(nn, stateid, &s);
if (status)
return status;
status = nfsd4_stid_check_stateid_generation(stateid, s,
@@ -5656,8 +5798,12 @@ done:
if (status == nfs_ok && nfp)
status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out:
- if (s)
- nfs4_put_stid(s);
+ if (s) {
+ if (!status && cstid)
+ *cstid = s;
+ else
+ nfs4_put_stid(s);
+ }
return status;
}
@@ -6550,7 +6696,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
if (fl_flags & FL_SLEEP) {
- nbl->nbl_time = jiffies;
+ nbl->nbl_time = ktime_get_boottime_seconds();
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
@@ -6657,7 +6803,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
if (!nfsd4_has_session(cstate)) {
- status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
+ status = lookup_clientid(&lockt->lt_clientid, cstate, nn,
+ false);
if (status)
goto out;
}
@@ -6841,7 +6988,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
return status;
@@ -6988,7 +7135,7 @@ nfs4_check_open_reclaim(clientid_t *clid,
__be32 status;
/* find clientid in conf_id_hashtbl */
- status = lookup_clientid(clid, cstate, nn);
+ status = lookup_clientid(clid, cstate, nn, false);
if (status)
return nfserr_reclaim_bad;
@@ -7641,7 +7788,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
- nn->boot_time = get_seconds();
+ nn->boot_time = ktime_get_real_seconds();
nn->grace_ended = false;
nn->nfsd4_manager.block_opens = true;
INIT_LIST_HEAD(&nn->nfsd4_manager.list);
@@ -7710,7 +7857,7 @@ nfs4_state_start_net(struct net *net)
nfsd4_client_tracking_init(net);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
goto skip_grace;
- printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+ printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
@@ -7786,7 +7933,8 @@ nfs4_state_shutdown(void)
static void
get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) &&
+ CURRENT_STATEID(stateid))
memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
}
@@ -7795,14 +7943,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
if (cstate->minorversion) {
memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
}
void
clear_current_stateid(struct nfsd4_compound_state *cstate)
{
- CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
/*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d2dc4c0e22e8..9761512674a0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,6 +40,7 @@
#include <linux/utsname.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/addr.h>
#include "idmap.h"
#include "acl.h"
@@ -1744,10 +1745,47 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
DECODE_TAIL;
}
+static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ struct nl4_server *ns)
+{
+ DECODE_HEAD;
+ struct nfs42_netaddr *naddr;
+
+ READ_BUF(4);
+ ns->nl4_type = be32_to_cpup(p++);
+
+ /* currently support for 1 inter-server source server */
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ naddr = &ns->u.nl4_addr;
+
+ READ_BUF(4);
+ naddr->netid_len = be32_to_cpup(p++);
+ if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
+ goto xdr_error;
+
+ READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */
+ COPYMEM(naddr->netid, naddr->netid_len);
+
+ naddr->addr_len = be32_to_cpup(p++);
+ if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
+ goto xdr_error;
+
+ READ_BUF(naddr->addr_len);
+ COPYMEM(naddr->addr, naddr->addr_len);
+ break;
+ default:
+ goto xdr_error;
+ }
+ DECODE_TAIL;
+}
+
static __be32
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{
DECODE_HEAD;
+ struct nl4_server *ns_dummy;
+ int i, count;
status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
if (status)
@@ -1762,7 +1800,32 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
p = xdr_decode_hyper(p, &copy->cp_count);
p++; /* ca_consecutive: we always do consecutive copies */
copy->cp_synchronous = be32_to_cpup(p++);
- /* tmp = be32_to_cpup(p); Source server list not supported */
+
+ count = be32_to_cpup(p++);
+
+ copy->cp_intra = false;
+ if (count == 0) { /* intra-server copy */
+ copy->cp_intra = true;
+ goto intra;
+ }
+
+ /* decode all the supplied server addresses but use first */
+ status = nfsd4_decode_nl4_server(argp, &copy->cp_src);
+ if (status)
+ return status;
+
+ ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
+ if (ns_dummy == NULL)
+ return nfserrno(-ENOMEM);
+ for (i = 0; i < count - 1; i++) {
+ status = nfsd4_decode_nl4_server(argp, ns_dummy);
+ if (status) {
+ kfree(ns_dummy);
+ return status;
+ }
+ }
+ kfree(ns_dummy);
+intra:
DECODE_TAIL;
}
@@ -1775,6 +1838,18 @@ nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
}
static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ struct nfsd4_copy_notify *cn)
+{
+ int status;
+
+ status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+ if (status)
+ return status;
+ return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1875,7 +1950,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2024,11 +2099,11 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
*/
static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
{
- struct timespec ts;
+ struct timespec64 ts;
u32 ns;
ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
- ts = ns_to_timespec(ns);
+ ts = ns_to_timespec64(ns);
p = xdr_encode_hyper(p, ts.tv_sec);
*p++ = cpu_to_be32(ts.tv_nsec);
@@ -4244,6 +4319,46 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
}
static __be32
+nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct nfs42_netaddr *addr;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4);
+ *p++ = cpu_to_be32(ns->nl4_type);
+
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ addr = &ns->u.nl4_addr;
+
+ /* netid_len, netid, uaddr_len, uaddr (port included
+ * in RPCBIND_MAXUADDRLEN)
+ */
+ p = xdr_reserve_space(xdr,
+ 4 /* netid len */ +
+ (XDR_QUADLEN(addr->netid_len) * 4) +
+ 4 /* uaddr len */ +
+ (XDR_QUADLEN(addr->addr_len) * 4));
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(addr->netid_len);
+ p = xdr_encode_opaque_fixed(p, addr->netid,
+ addr->netid_len);
+ *p++ = cpu_to_be32(addr->addr_len);
+ p = xdr_encode_opaque_fixed(p, addr->addr,
+ addr->addr_len);
+ break;
+ default:
+ WARN_ON_ONCE(ns->nl4_type != NL4_NETADDR);
+ return nfserr_inval;
+ }
+
+ return 0;
+}
+
+static __be32
nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_copy *copy)
{
@@ -4277,6 +4392,40 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
}
static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_copy_notify *cn)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ __be32 *p;
+
+ if (nfserr)
+ return nfserr;
+
+ /* 8 sec, 4 nsec */
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
+ return nfserr_resource;
+
+ /* cnr_lease_time */
+ p = xdr_encode_hyper(p, cn->cpn_sec);
+ *p++ = cpu_to_be32(cn->cpn_nsec);
+
+ /* cnr_stateid */
+ nfserr = nfsd4_encode_stateid(xdr, &cn->cpn_cnr_stateid);
+ if (nfserr)
+ return nfserr;
+
+ /* cnr_src.nl_nsvr */
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(1);
+
+ return nfsd42_encode_nl4_server(resp, &cn->cpn_src);
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -4373,7 +4522,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_copy_notify,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
@@ -4500,8 +4649,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
__be32 *p;
struct nfs4_replay *rp = op->replay;
- BUG_ON(!rp);
-
p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
if (!p) {
WARN_ON_ONCE(1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7eb919f1b13f..e109a1007704 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -956,7 +956,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
char *mesg = buf;
int rv, i;
@@ -984,11 +984,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
*time = i;
}
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time);
}
static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
ssize_t rv;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 57b93d95fa5c..2ab5569126b8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,6 +19,7 @@
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/addr.h>
#include <uapi/linux/nfsd/debug.h>
@@ -142,7 +143,6 @@ int nfs4_state_start(void);
int nfs4_state_start_net(struct net *net);
void nfs4_state_shutdown(void);
void nfs4_state_shutdown_net(struct net *net);
-void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
@@ -153,7 +153,6 @@ static inline int nfs4_state_start(void) { return 0; }
static inline int nfs4_state_start_net(struct net *net) { return 0; }
static inline void nfs4_state_shutdown(void) { }
static inline void nfs4_state_shutdown_net(struct net *net) { }
-static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
static inline char * nfs4_recoverydir(void) {return NULL; }
static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
@@ -387,6 +386,37 @@ void nfsd_lockd_shutdown(void);
extern const u32 nfsd_suppattrs[3][3];
+static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr,
+ struct nfs42_netaddr *netaddr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+ unsigned int port;
+ size_t ret_addr, ret_port;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ port = ntohs(sin->sin_port);
+ sprintf(netaddr->netid, "tcp");
+ netaddr->netid_len = 3;
+ break;
+ case AF_INET6:
+ port = ntohs(sin6->sin6_port);
+ sprintf(netaddr->netid, "tcp6");
+ netaddr->netid_len = 4;
+ break;
+ default:
+ return nfserr_inval;
+ }
+ ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr));
+ ret_port = snprintf(netaddr->addr + ret_addr,
+ RPCBIND_MAXUADDRLEN + 1 - ret_addr,
+ ".%u.%u", port >> 8, port & 0xff);
+ WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr);
+ netaddr->addr_len = ret_addr + ret_port;
+ return 0;
+}
+
static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 755e256a9103..56cfbc361561 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -35,15 +35,15 @@ typedef struct svc_fh {
bool fh_locked; /* inode locked by us */
bool fh_want_write; /* remount protection taken */
-
+ int fh_flags; /* FH flags */
#ifdef CONFIG_NFSD_V3
bool fh_post_saved; /* post-op attrs saved */
bool fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
- struct timespec fh_pre_mtime; /* mtime before oper */
- struct timespec fh_pre_ctime; /* ctime before oper */
+ struct timespec64 fh_pre_mtime; /* mtime before oper */
+ struct timespec64 fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
@@ -56,6 +56,9 @@ typedef struct svc_fh {
#endif /* CONFIG_NFSD_V3 */
} svc_fh;
+#define NFSD4_FH_FOREIGN (1<<0)
+#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
+#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f))
enum nfsd_fsid {
FSID_DEV = 0,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index c83ddac22f38..543bbe0a556e 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -94,7 +94,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
* Solaris, at least, doesn't seem to care what the time
* request is. We require it be within 30 minutes of now.
*/
- time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds();
nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (nfserr)
@@ -113,7 +113,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
- nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0);
done:
return nfsd_return_attrs(nfserr, resp);
}
@@ -226,7 +226,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
return nfserr_io;
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC);
+ &cnt, NFS_DATA_SYNC, NULL);
return nfsd_return_attrs(nfserr, resp);
}
@@ -380,7 +380,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0);
}
out_unlock:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e8bee8ff30c5..3b77b904212d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -31,6 +31,12 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
+bool inter_copy_offload_enable;
+EXPORT_SYMBOL_GPL(inter_copy_offload_enable);
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+ "Enable inter server to server copy offload. Default: false");
+
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -391,20 +397,25 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre
ret = lockd_up(net, cred);
if (ret)
goto out_socks;
- nn->lockd_up = 1;
+ nn->lockd_up = true;
}
- ret = nfs4_state_start_net(net);
+ ret = nfsd_file_cache_start_net(net);
if (ret)
goto out_lockd;
+ ret = nfs4_state_start_net(net);
+ if (ret)
+ goto out_filecache;
nn->nfsd_net_up = true;
return 0;
+out_filecache:
+ nfsd_file_cache_shutdown_net(net);
out_lockd:
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
out_socks:
nfsd_shutdown_generic();
@@ -415,11 +426,11 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_file_cache_purge(net);
+ nfsd_file_cache_shutdown_net(net);
nfs4_state_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d61b83b9654c..68d3f30ee760 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -56,6 +56,14 @@ typedef struct {
stateid_opaque_t si_opaque;
} stateid_t;
+typedef struct {
+ stateid_t stid;
+#define NFS4_COPY_STID 1
+#define NFS4_COPYNOTIFY_STID 2
+ unsigned char sc_type;
+ refcount_t sc_count;
+} copy_stateid_t;
+
#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
#define STATEID_VAL(s) \
(s)->si_opaque.so_clid.cl_boot, \
@@ -96,6 +104,7 @@ struct nfs4_stid {
#define NFS4_REVOKED_DELEG_STID 16
#define NFS4_CLOSED_DELEG_STID 32
#define NFS4_LAYOUT_STID 64
+ struct list_head sc_cp_list;
unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
@@ -104,6 +113,17 @@ struct nfs4_stid {
void (*sc_free)(struct nfs4_stid *);
};
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+ copy_stateid_t cp_stateid;
+ struct list_head cp_list; /* per parent nfs4_stid */
+ stateid_t cp_p_stateid; /* copy of parent's stateid */
+ clientid_t cp_p_clid; /* copy of parent's clid */
+ time64_t cpntf_time; /* last time stateid used */
+};
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -132,7 +152,7 @@ struct nfs4_delegation {
struct list_head dl_recall_lru; /* delegation recalled */
struct nfs4_clnt_odstate *dl_clnt_odstate;
u32 dl_type;
- time_t dl_time;
+ time64_t dl_time;
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
@@ -310,7 +330,7 @@ struct nfs4_client {
#endif
struct xdr_netobj cl_name; /* id generated by client */
nfs4_verifier cl_verifier; /* generated by client */
- time_t cl_time; /* time of last lease renewal */
+ time64_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
bool cl_mach_cred; /* SP4_MACH_CRED in force */
struct svc_cred cl_cred; /* setclientid principal */
@@ -320,7 +340,7 @@ struct nfs4_client {
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
- struct timespec cl_nii_time;
+ struct timespec64 cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -449,7 +469,7 @@ struct nfs4_openowner {
*/
struct list_head oo_close_lru;
struct nfs4_ol_stateid *oo_last_closed_stid;
- time_t oo_time; /* time of placement on so_close_lru */
+ time64_t oo_time; /* time of placement on so_close_lru */
#define NFS4_OO_CONFIRMED 1
unsigned char oo_flags;
};
@@ -606,7 +626,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
struct nfsd4_blocked_lock {
struct list_head nbl_list;
struct list_head nbl_lru;
- unsigned long nbl_time;
+ time64_t nbl_time;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
@@ -618,14 +638,17 @@ struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct nfsd_file **filp);
+ stateid_t *stateid, int flags, struct nfsd_file **filp,
+ struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
-void nfs4_free_cp_state(struct nfsd4_copy *copy);
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_copy_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid);
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
@@ -655,6 +678,11 @@ void put_nfs4_file(struct nfs4_file *fi);
extern void nfs4_put_copy(struct nfsd4_copy *copy);
extern struct nfsd4_copy *
find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
+extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_cpntf_state *cps);
+extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
refcount_inc(&fi->fi_ref);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index ffc78a0e28b2..06dd0d337049 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -166,6 +166,12 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
+TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
+TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
+TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
+TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
+
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
@@ -195,7 +201,7 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
TP_fast_assign(
__entry->nf_hashval = nf->nf_hashval;
__entry->nf_inode = nf->nf_inode;
- __entry->nf_ref = atomic_read(&nf->nf_ref);
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
__entry->nf_flags = nf->nf_flags;
__entry->nf_may = nf->nf_may;
__entry->nf_file = nf->nf_file;
@@ -228,7 +234,7 @@ TRACE_EVENT(nfsd_file_acquire,
TP_ARGS(rqstp, hash, inode, may_flags, nf, status),
TP_STRUCT__entry(
- __field(__be32, xid)
+ __field(u32, xid)
__field(unsigned int, hash)
__field(void *, inode)
__field(unsigned int, may_flags)
@@ -236,27 +242,27 @@ TRACE_EVENT(nfsd_file_acquire,
__field(unsigned long, nf_flags)
__field(unsigned char, nf_may)
__field(struct file *, nf_file)
- __field(__be32, status)
+ __field(u32, status)
),
TP_fast_assign(
- __entry->xid = rqstp->rq_xid;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->hash = hash;
__entry->inode = inode;
__entry->may_flags = may_flags;
- __entry->nf_ref = nf ? atomic_read(&nf->nf_ref) : 0;
+ __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
__entry->nf_flags = nf ? nf->nf_flags : 0;
__entry->nf_may = nf ? nf->nf_may : 0;
__entry->nf_file = nf ? nf->nf_file : NULL;
- __entry->status = status;
+ __entry->status = be32_to_cpu(status);
),
TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
- be32_to_cpu(__entry->xid), __entry->hash, __entry->inode,
+ __entry->xid, __entry->hash, __entry->inode,
show_nf_may(__entry->may_flags), __entry->nf_ref,
show_nf_flags(__entry->nf_flags),
show_nf_may(__entry->nf_may), __entry->nf_file,
- be32_to_cpu(__entry->status))
+ __entry->status)
);
DECLARE_EVENT_CLASS(nfsd_file_search_class,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c0dc491537a6..0aa02eb18bd3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -280,19 +280,25 @@ out:
* Commit metadata changes to stable storage.
*/
static int
-commit_metadata(struct svc_fh *fhp)
+commit_inode_metadata(struct inode *inode)
{
- struct inode *inode = d_inode(fhp->fh_dentry);
const struct export_operations *export_ops = inode->i_sb->s_export_op;
- if (!EX_ISSYNC(fhp->fh_export))
- return 0;
-
if (export_ops->commit_metadata)
return export_ops->commit_metadata(inode);
return sync_inode_metadata(inode, 1);
}
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = d_inode(fhp->fh_dentry);
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+ return commit_inode_metadata(inode);
+}
+
/*
* Go over the attributes and take care of the small differences between
* NFS semantics and what Linux expects.
@@ -358,7 +364,7 @@ out_nfserrno:
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+ int check_guard, time64_t guardtime)
{
struct dentry *dentry;
struct inode *inode;
@@ -524,23 +530,39 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
- u64 dst_pos, u64 count, bool sync)
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync)
{
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
loff_t cloned;
+ __be32 ret = 0;
+ down_write(&nf_dst->nf_rwsem);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
- if (cloned < 0)
- return nfserrno(cloned);
- if (count && cloned != count)
- return nfserrno(-EINVAL);
+ if (cloned < 0) {
+ ret = nfserrno(cloned);
+ goto out_err;
+ }
+ if (count && cloned != count) {
+ ret = nfserrno(-EINVAL);
+ goto out_err;
+ }
if (sync) {
loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
- if (status < 0)
- return nfserrno(status);
+
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
+ nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net,
+ nfsd_net_id));
+ ret = nfserrno(status);
+ }
}
- return 0;
+out_err:
+ up_write(&nf_dst->nf_rwsem);
+ return ret;
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
@@ -938,10 +960,12 @@ static int wait_for_concurrent_writes(struct file *file)
}
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable)
+ unsigned long *cnt, int stable,
+ __be32 *verf)
{
+ struct file *file = nf->nf_file;
struct svc_export *exp;
struct iov_iter iter;
__be32 nfserr;
@@ -972,9 +996,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
flags |= RWF_SYNC;
iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
+ if (flags & RWF_SYNC) {
+ down_write(&nf->nf_rwsem);
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ if (host_err < 0)
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ up_write(&nf->nf_rwsem);
+ } else {
+ down_read(&nf->nf_rwsem);
+ if (verf)
+ nfsd_copy_boot_verifier(verf,
+ net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
+ host_err = vfs_iter_write(file, &iter, &pos, flags);
+ up_read(&nf->nf_rwsem);
+ }
+ if (host_err < 0) {
+ nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp),
+ nfsd_net_id));
goto out_nfserr;
+ }
+ *cnt = host_err;
nfsdstats.io_write += *cnt;
fsnotify_modify(file);
@@ -1036,7 +1079,8 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable)
+ struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ __be32 *verf)
{
struct nfsd_file *nf;
__be32 err;
@@ -1047,8 +1091,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, nf->nf_file, offset, vec,
- vlen, cnt, stable);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
+ vlen, cnt, stable, verf);
nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
@@ -1067,7 +1111,7 @@ out:
*/
__be32
nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count)
+ loff_t offset, unsigned long count, __be32 *verf)
{
struct nfsd_file *nf;
loff_t end = LLONG_MAX;
@@ -1086,10 +1130,14 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
goto out;
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
+ int err2;
+ down_write(&nf->nf_rwsem);
+ err2 = vfs_fsync_range(nf->nf_file, offset, end, 0);
switch (err2) {
case 0:
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
break;
case -EINVAL:
err = nfserr_notsupp;
@@ -1099,7 +1147,10 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_reset_boot_verifier(net_generic(nf->nf_net,
nfsd_net_id));
}
- }
+ up_write(&nf->nf_rwsem);
+ } else
+ nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net,
+ nfsd_net_id));
nfsd_file_put(nf);
out:
@@ -1123,7 +1174,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0);
/* Callers expect file metadata to be committed here */
return nfserrno(commit_metadata(resfhp));
}
@@ -1386,7 +1437,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
break;
}
/* fall through */
@@ -1395,7 +1446,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
&& d_inode(dchild)->i_atime.tv_sec == v_atime
&& d_inode(dchild)->i_size == 0 ) {
if (created)
- *created = 1;
+ *created = true;
goto set_attr;
}
/* fall through */
@@ -1412,7 +1463,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
if (created)
- *created = 1;
+ *created = true;
nfsd_check_ignore_resizing(iap);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index cc110a10bfe8..3eb660ad80d1 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -34,6 +34,8 @@
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+struct nfsd_file;
+
/*
* Callback function for readdir
*/
@@ -48,15 +50,16 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct iattr *, int, time_t);
+ struct iattr *, int, time64_t);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
- u64, u64, bool);
+__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
@@ -71,7 +74,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
struct svc_fh *res, int createmode,
u32 *verifier, bool *truncp, bool *created);
__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long);
+ loff_t, unsigned long, __be32 *verf);
#endif /* CONFIG_NFSD_V3 */
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
@@ -91,11 +94,12 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *,
u32 *eof);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *, int);
+ struct kvec *, int, unsigned long *,
+ int stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
+ struct nfsd_file *nf, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
- int stable);
+ int stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 99ff9f403ff1..4155fd71714c 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
- time_t guardtime;
+ time64_t guardtime;
};
struct nfsd3_diropargs {
@@ -159,6 +159,7 @@ struct nfsd3_writeres {
struct svc_fh fh;
unsigned long count;
int committed;
+ __be32 verf[2];
};
struct nfsd3_renameres {
@@ -223,6 +224,7 @@ struct nfsd3_pathconfres {
struct nfsd3_commitres {
__be32 status;
struct svc_fh fh;
+ __be32 verf[2];
};
struct nfsd3_getaclres {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index f4737d66ee98..db63d39b1507 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -46,9 +46,9 @@
#define CURRENT_STATE_ID_FLAG (1<<0)
#define SAVED_STATE_ID_FLAG (1<<1)
-#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
-#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
-#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
+#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f))
+#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f))
+#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f))
struct nfsd4_compound_state {
struct svc_fh current_fh;
@@ -221,6 +221,7 @@ struct nfsd4_lookup {
struct nfsd4_putfh {
u32 pf_fhlen; /* request */
char *pf_fhval; /* request */
+ bool no_verify; /* represents foreigh fh */
};
struct nfsd4_open {
@@ -518,11 +519,13 @@ struct nfsd42_write_res {
struct nfsd4_copy {
/* request */
- stateid_t cp_src_stateid;
- stateid_t cp_dst_stateid;
- u64 cp_src_pos;
- u64 cp_dst_pos;
- u64 cp_count;
+ stateid_t cp_src_stateid;
+ stateid_t cp_dst_stateid;
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+ struct nl4_server cp_src;
+ bool cp_intra;
/* both */
bool cp_synchronous;
@@ -540,13 +543,18 @@ struct nfsd4_copy {
struct nfsd_file *nf_src;
struct nfsd_file *nf_dst;
- stateid_t cp_stateid;
+ copy_stateid_t cp_stateid;
struct list_head copies;
struct task_struct *copy_task;
refcount_t refcount;
bool stopped;
+
+ struct vfsmount *ss_mnt;
+ struct nfs_fh c_fh;
+ nfs4_stateid stateid;
};
+extern bool inter_copy_offload_enable;
struct nfsd4_seek {
/* request */
@@ -568,6 +576,18 @@ struct nfsd4_offload_status {
u32 status;
};
+struct nfsd4_copy_notify {
+ /* request */
+ stateid_t cpn_src_stateid;
+ struct nl4_server cpn_dst;
+
+ /* response */
+ stateid_t cpn_cnr_stateid;
+ u64 cpn_sec;
+ u32 cpn_nsec;
+ struct nl4_server cpn_src;
+};
+
struct nfsd4_op {
int opnum;
const struct nfsd4_operation * opdesc;
@@ -627,6 +647,7 @@ struct nfsd4_op {
struct nfsd4_clone clone;
struct nfsd4_copy copy;
struct nfsd4_offload_status offload_status;
+ struct nfsd4_copy_notify copy_notify;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 25543a966c48..29eaa4544372 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -273,6 +273,7 @@ static void *help_start(struct seq_file *m, loff_t *pos)
static void *help_next(struct seq_file *m, void *v, loff_t *pos)
{
+ (*pos)++;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
return NULL;
diff --git a/fs/pipe.c b/fs/pipe.c
index 57502c3c0fba..5a34d6c22d4c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT(rdwait);
+ DEFINE_WAIT(wrwait);
/*
* Pipes are system-local resources, so sleeping on them
* is considered a noninteractive wait:
*/
- prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
schedule();
- finish_wait(&pipe->wait, &wait);
+ finish_wait(&pipe->rd_wait, &rdwait);
+ finish_wait(&pipe->wr_wait, &wrwait);
pipe_lock(pipe);
}
@@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
size_t total_len = iov_iter_count(to);
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
- bool was_full;
+ bool was_full, wake_next_reader = false;
ssize_t ret;
/* Null read succeeds. */
@@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (!buf->len) {
pipe_buf_release(pipe, buf);
- spin_lock_irq(&pipe->wait.lock);
+ spin_lock_irq(&pipe->rd_wait.lock);
tail++;
pipe->tail = tail;
- spin_unlock_irq(&pipe->wait.lock);
+ spin_unlock_irq(&pipe->rd_wait.lock);
}
total_len -= chars;
if (!total_len)
@@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* no data.
*/
if (unlikely(was_full)) {
- wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+ wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
@@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* since we've done any required wakeups and there's no need
* to mark anything accessed. And we've dropped the lock.
*/
- if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0)
+ if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
__pipe_lock(pipe);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+ wake_next_reader = true;
}
+ if (pipe_empty(pipe->head, pipe->tail))
+ wake_next_reader = false;
__pipe_unlock(pipe);
if (was_full) {
- wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
+ wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
+ if (wake_next_reader)
+ wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
if (ret > 0)
file_accessed(filp);
return ret;
@@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
size_t total_len = iov_iter_count(from);
ssize_t chars;
bool was_empty = false;
+ bool wake_next_writer = false;
/* Null write succeeds. */
if (unlikely(total_len == 0))
@@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* it, either the reader will consume it or it'll still
* be there for the next write.
*/
- spin_lock_irq(&pipe->wait.lock);
+ spin_lock_irq(&pipe->rd_wait.lock);
head = pipe->head;
if (pipe_full(head, pipe->tail, pipe->max_usage)) {
- spin_unlock_irq(&pipe->wait.lock);
+ spin_unlock_irq(&pipe->rd_wait.lock);
continue;
}
pipe->head = head + 1;
- spin_unlock_irq(&pipe->wait.lock);
+ spin_unlock_irq(&pipe->rd_wait.lock);
/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
@@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
*/
__pipe_unlock(pipe);
if (was_empty) {
- wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+ wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
- wait_event_interruptible(pipe->wait, pipe_writable(pipe));
+ wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
__pipe_lock(pipe);
was_empty = pipe_empty(pipe->head, pipe->tail);
+ wake_next_writer = true;
}
out:
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ wake_next_writer = false;
__pipe_unlock(pipe);
/*
@@ -596,9 +608,11 @@ out:
* wake up pending jobs
*/
if (was_empty) {
- wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+ wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
+ if (wake_next_writer)
+ wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
int err = file_update_time(filp);
if (err)
@@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait)
unsigned int head, tail;
/*
- * Reading only -- no need for acquiring the semaphore.
+ * Reading pipe state only -- no need for acquiring the semaphore.
*
* But because this is racy, the code has to add the
* entry to the poll table _first_ ..
*/
- poll_wait(filp, &pipe->wait, wait);
+ if (filp->f_mode & FMODE_READ)
+ poll_wait(filp, &pipe->rd_wait, wait);
+ if (filp->f_mode & FMODE_WRITE)
+ poll_wait(filp, &pipe->wr_wait, wait);
/*
* .. and only then can you do the racy tests. That way,
@@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file)
pipe->writers--;
if (pipe->readers || pipe->writers) {
- wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
+ wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP);
+ wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
@@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
GFP_KERNEL_ACCOUNT);
if (pipe->bufs) {
- init_waitqueue_head(&pipe->wait);
+ init_waitqueue_head(&pipe->rd_wait);
+ init_waitqueue_head(&pipe->wr_wait);
pipe->r_counter = pipe->w_counter = 1;
pipe->max_usage = pipe_bufs;
pipe->ring_size = pipe_bufs;
@@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
static void wake_up_partner(struct pipe_inode_info *pipe)
{
- wake_up_interruptible(&pipe->wait);
+ wake_up_interruptible(&pipe->rd_wait);
+ wake_up_interruptible(&pipe->wr_wait);
}
static int fifo_open(struct inode *inode, struct file *filp)
@@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp)
err_rd:
if (!--pipe->readers)
- wake_up_interruptible(&pipe->wait);
+ wake_up_interruptible(&pipe->wr_wait);
ret = -ERESTARTSYS;
goto err;
err_wr:
if (!--pipe->writers)
- wake_up_interruptible(&pipe->wait);
+ wake_up_interruptible(&pipe->rd_wait);
ret = -ERESTARTSYS;
goto err;
@@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
pipe->max_usage = nr_slots;
pipe->tail = tail;
pipe->head = head;
- wake_up_interruptible_all(&pipe->wait);
+ wake_up_interruptible_all(&pipe->rd_wait);
+ wake_up_interruptible_all(&pipe->wr_wait);
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 72c07a34cff0..608233dfd29c 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -41,24 +41,19 @@ enum proc_param {
Opt_hidepid,
};
-static const struct fs_parameter_spec proc_param_specs[] = {
+static const struct fs_parameter_spec proc_fs_parameters[] = {
fsparam_u32("gid", Opt_gid),
fsparam_u32("hidepid", Opt_hidepid),
{}
};
-static const struct fs_parameter_description proc_fs_parameters = {
- .name = "proc",
- .specs = proc_param_specs,
-};
-
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct proc_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
int opt;
- opt = fs_parse(fc, &proc_fs_parameters, param, &result);
+ opt = fs_parse(fc, proc_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -71,7 +66,7 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->hidepid = result.uint_32;
if (ctx->hidepid < HIDEPID_OFF ||
ctx->hidepid > HIDEPID_INVISIBLE)
- return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
+ return invalfc(fc, "hidepid value must be between 0 and 2.\n");
break;
default:
@@ -207,7 +202,7 @@ static void proc_kill_sb(struct super_block *sb)
static struct file_system_type proc_fs_type = {
.name = "proc",
.init_fs_context = proc_init_fs_context,
- .parameters = &proc_fs_parameters,
+ .parameters = proc_fs_parameters,
.kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index d82636e8eb65..ee179a81b3da 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -181,23 +181,18 @@ enum ramfs_param {
Opt_mode,
};
-static const struct fs_parameter_spec ramfs_param_specs[] = {
+const struct fs_parameter_spec ramfs_fs_parameters[] = {
fsparam_u32oct("mode", Opt_mode),
{}
};
-const struct fs_parameter_description ramfs_fs_parameters = {
- .name = "ramfs",
- .specs = ramfs_param_specs,
-};
-
static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct fs_parse_result result;
struct ramfs_fs_info *fsi = fc->s_fs_info;
int opt;
- opt = fs_parse(fc, &ramfs_fs_parameters, param, &result);
+ opt = fs_parse(fc, ramfs_fs_parameters, param, &result);
if (opt < 0) {
/*
* We might like to report bad mount options here;
@@ -278,7 +273,7 @@ static void ramfs_kill_sb(struct super_block *sb)
static struct file_system_type ramfs_fs_type = {
.name = "ramfs",
.init_fs_context = ramfs_init_fs_context,
- .parameters = &ramfs_fs_parameters,
+ .parameters = ramfs_fs_parameters,
.kill_sb = ramfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT,
};
diff --git a/fs/splice.c b/fs/splice.c
index 3009652a41c8..d671936d0aad 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
{
smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
+ if (waitqueue_active(&pipe->rd_wait))
+ wake_up_interruptible(&pipe->rd_wait);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
@@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
{
smp_mb();
- if (waitqueue_active(&pipe->wait))
- wake_up_interruptible(&pipe->wait);
+ if (waitqueue_active(&pipe->wr_wait))
+ wake_up_interruptible(&pipe->wr_wait);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index d46e9baee285..b88aecc86550 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -35,4 +35,4 @@ $(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE
endif
targets += utf8data.h
-hostprogs-y += mkutf8data
+hostprogs += mkutf8data
diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig
new file mode 100644
index 000000000000..b84586ae08b3
--- /dev/null
+++ b/fs/vboxsf/Kconfig
@@ -0,0 +1,10 @@
+config VBOXSF_FS
+ tristate "VirtualBox guest shared folder (vboxsf) support"
+ depends on X86 && VBOXGUEST
+ select NLS
+ help
+ VirtualBox hosts can share folders with guests, this driver
+ implements the Linux-guest side of this allowing folders exported
+ by the host to be mounted under Linux.
+
+ If you want to use shared folders in VirtualBox guests, answer Y or M.
diff --git a/fs/vboxsf/Makefile b/fs/vboxsf/Makefile
new file mode 100644
index 000000000000..9e4328e79623
--- /dev/null
+++ b/fs/vboxsf/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: MIT
+
+obj-$(CONFIG_VBOXSF_FS) += vboxsf.o
+
+vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
new file mode 100644
index 000000000000..dd147b490982
--- /dev/null
+++ b/fs/vboxsf/dir.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Directory inode and file operations
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/namei.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+static int vboxsf_dir_open(struct inode *inode, struct file *file)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+ struct shfl_createparms params = {};
+ struct vboxsf_dir_info *sf_d;
+ int err;
+
+ sf_d = vboxsf_dir_info_alloc();
+ if (!sf_d)
+ return -ENOMEM;
+
+ params.handle = SHFL_HANDLE_NIL;
+ params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS |
+ SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ;
+
+ err = vboxsf_create_at_dentry(file_dentry(file), &params);
+ if (err)
+ goto err_free_dir_info;
+
+ if (params.result != SHFL_FILE_EXISTS) {
+ err = -ENOENT;
+ goto err_close;
+ }
+
+ err = vboxsf_dir_read_all(sbi, sf_d, params.handle);
+ if (err)
+ goto err_close;
+
+ vboxsf_close(sbi->root, params.handle);
+ file->private_data = sf_d;
+ return 0;
+
+err_close:
+ vboxsf_close(sbi->root, params.handle);
+err_free_dir_info:
+ vboxsf_dir_info_free(sf_d);
+ return err;
+}
+
+static int vboxsf_dir_release(struct inode *inode, struct file *file)
+{
+ if (file->private_data)
+ vboxsf_dir_info_free(file->private_data);
+
+ return 0;
+}
+
+static unsigned int vboxsf_get_d_type(u32 mode)
+{
+ unsigned int d_type;
+
+ switch (mode & SHFL_TYPE_MASK) {
+ case SHFL_TYPE_FIFO:
+ d_type = DT_FIFO;
+ break;
+ case SHFL_TYPE_DEV_CHAR:
+ d_type = DT_CHR;
+ break;
+ case SHFL_TYPE_DIRECTORY:
+ d_type = DT_DIR;
+ break;
+ case SHFL_TYPE_DEV_BLOCK:
+ d_type = DT_BLK;
+ break;
+ case SHFL_TYPE_FILE:
+ d_type = DT_REG;
+ break;
+ case SHFL_TYPE_SYMLINK:
+ d_type = DT_LNK;
+ break;
+ case SHFL_TYPE_SOCKET:
+ d_type = DT_SOCK;
+ break;
+ case SHFL_TYPE_WHITEOUT:
+ d_type = DT_WHT;
+ break;
+ default:
+ d_type = DT_UNKNOWN;
+ break;
+ }
+ return d_type;
+}
+
+static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb);
+ struct vboxsf_dir_info *sf_d = dir->private_data;
+ struct shfl_dirinfo *info;
+ struct vboxsf_dir_buf *b;
+ unsigned int d_type;
+ loff_t i, cur = 0;
+ ino_t fake_ino;
+ void *end;
+ int err;
+
+ list_for_each_entry(b, &sf_d->info_list, head) {
+try_next_entry:
+ if (ctx->pos >= cur + b->entries) {
+ cur += b->entries;
+ continue;
+ }
+
+ /*
+ * Note the vboxsf_dir_info objects we are iterating over here
+ * are variable sized, so the info pointer may end up being
+ * unaligned. This is how we get the data from the host.
+ * Since vboxsf is only supported on x86 machines this is not
+ * a problem.
+ */
+ for (i = 0, info = b->buf; i < ctx->pos - cur; i++) {
+ end = &info->name.string.utf8[info->name.size];
+ /* Only happens if the host gives us corrupt data */
+ if (WARN_ON(end > (b->buf + b->used)))
+ return false;
+ info = end;
+ }
+
+ end = &info->name.string.utf8[info->name.size];
+ if (WARN_ON(end > (b->buf + b->used)))
+ return false;
+
+ /* Info now points to the right entry, emit it. */
+ d_type = vboxsf_get_d_type(info->info.attr.mode);
+
+ /*
+ * On 32 bit systems pos is 64 signed, while ino is 32 bit
+ * unsigned so fake_ino may overflow, check for this.
+ */
+ if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) {
+ vbg_err("vboxsf: fake ino overflow, truncating dir\n");
+ return false;
+ }
+ fake_ino = ctx->pos + 1;
+
+ if (sbi->nls) {
+ char d_name[NAME_MAX];
+
+ err = vboxsf_nlscpy(sbi, d_name, NAME_MAX,
+ info->name.string.utf8,
+ info->name.length);
+ if (err) {
+ /* skip erroneous entry and proceed */
+ ctx->pos += 1;
+ goto try_next_entry;
+ }
+
+ return dir_emit(ctx, d_name, strlen(d_name),
+ fake_ino, d_type);
+ }
+
+ return dir_emit(ctx, info->name.string.utf8, info->name.length,
+ fake_ino, d_type);
+ }
+
+ return false;
+}
+
+static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
+{
+ bool emitted;
+
+ do {
+ emitted = vboxsf_dir_emit(dir, ctx);
+ if (emitted)
+ ctx->pos += 1;
+ } while (emitted);
+
+ return 0;
+}
+
+const struct file_operations vboxsf_dir_fops = {
+ .open = vboxsf_dir_open,
+ .iterate = vboxsf_dir_iterate,
+ .release = vboxsf_dir_release,
+ .read = generic_read_dir,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * This is called during name resolution/lookup to check if the @dentry in
+ * the cache is still valid. the job is handled by vboxsf_inode_revalidate.
+ */
+static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ if (d_really_is_positive(dentry))
+ return vboxsf_inode_revalidate(dentry) == 0;
+ else
+ return vboxsf_stat_dentry(dentry, NULL) == -ENOENT;
+}
+
+const struct dentry_operations vboxsf_dentry_ops = {
+ .d_revalidate = vboxsf_dentry_revalidate
+};
+
+/* iops */
+
+static struct dentry *vboxsf_dir_lookup(struct inode *parent,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+ struct shfl_fsobjinfo fsinfo;
+ struct inode *inode;
+ int err;
+
+ dentry->d_time = jiffies;
+
+ err = vboxsf_stat_dentry(dentry, &fsinfo);
+ if (err) {
+ inode = (err == -ENOENT) ? NULL : ERR_PTR(err);
+ } else {
+ inode = vboxsf_new_inode(parent->i_sb);
+ if (!IS_ERR(inode))
+ vboxsf_init_inode(sbi, inode, &fsinfo);
+ }
+
+ return d_splice_alias(inode, dentry);
+}
+
+static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry,
+ struct shfl_fsobjinfo *info)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+ struct vboxsf_inode *sf_i;
+ struct inode *inode;
+
+ inode = vboxsf_new_inode(parent->i_sb);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ sf_i = VBOXSF_I(inode);
+ /* The host may have given us different attr then requested */
+ sf_i->force_restat = 1;
+ vboxsf_init_inode(sbi, inode, info);
+
+ d_instantiate(dentry, inode);
+
+ return 0;
+}
+
+static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry,
+ umode_t mode, int is_dir)
+{
+ struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+ struct shfl_createparms params = {};
+ int err;
+
+ params.handle = SHFL_HANDLE_NIL;
+ params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW |
+ SHFL_CF_ACT_FAIL_IF_EXISTS |
+ SHFL_CF_ACCESS_READWRITE |
+ (is_dir ? SHFL_CF_DIRECTORY : 0);
+ params.info.attr.mode = (mode & 0777) |
+ (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE);
+ params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING;
+
+ err = vboxsf_create_at_dentry(dentry, &params);
+ if (err)
+ return err;
+
+ if (params.result != SHFL_FILE_CREATED)
+ return -EPERM;
+
+ vboxsf_close(sbi->root, params.handle);
+
+ err = vboxsf_dir_instantiate(parent, dentry, &params.info);
+ if (err)
+ return err;
+
+ /* parent directory access/change time changed */
+ sf_parent_i->force_restat = 1;
+
+ return 0;
+}
+
+static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry,
+ umode_t mode, bool excl)
+{
+ return vboxsf_dir_create(parent, dentry, mode, 0);
+}
+
+static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry,
+ umode_t mode)
+{
+ return vboxsf_dir_create(parent, dentry, mode, 1);
+}
+
+static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+ struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+ struct inode *inode = d_inode(dentry);
+ struct shfl_string *path;
+ u32 flags;
+ int err;
+
+ if (S_ISDIR(inode->i_mode))
+ flags = SHFL_REMOVE_DIR;
+ else
+ flags = SHFL_REMOVE_FILE;
+
+ if (S_ISLNK(inode->i_mode))
+ flags |= SHFL_REMOVE_SYMLINK;
+
+ path = vboxsf_path_from_dentry(sbi, dentry);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ err = vboxsf_remove(sbi->root, path, flags);
+ __putname(path);
+ if (err)
+ return err;
+
+ /* parent directory access/change time changed */
+ sf_parent_i->force_restat = 1;
+
+ return 0;
+}
+
+static int vboxsf_dir_rename(struct inode *old_parent,
+ struct dentry *old_dentry,
+ struct inode *new_parent,
+ struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb);
+ struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent);
+ struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent);
+ u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS;
+ struct shfl_string *old_path, *new_path;
+ int err;
+
+ if (flags)
+ return -EINVAL;
+
+ old_path = vboxsf_path_from_dentry(sbi, old_dentry);
+ if (IS_ERR(old_path))
+ return PTR_ERR(old_path);
+
+ new_path = vboxsf_path_from_dentry(sbi, new_dentry);
+ if (IS_ERR(new_path)) {
+ err = PTR_ERR(new_path);
+ goto err_put_old_path;
+ }
+
+ if (d_inode(old_dentry)->i_mode & S_IFDIR)
+ shfl_flags = 0;
+
+ err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags);
+ if (err == 0) {
+ /* parent directories access/change time changed */
+ sf_new_parent_i->force_restat = 1;
+ sf_old_parent_i->force_restat = 1;
+ }
+
+ __putname(new_path);
+err_put_old_path:
+ __putname(old_path);
+ return err;
+}
+
+static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry,
+ const char *symname)
+{
+ struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb);
+ int symname_size = strlen(symname) + 1;
+ struct shfl_string *path, *ssymname;
+ struct shfl_fsobjinfo info;
+ int err;
+
+ path = vboxsf_path_from_dentry(sbi, dentry);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL);
+ if (!ssymname) {
+ __putname(path);
+ return -ENOMEM;
+ }
+ ssymname->length = symname_size - 1;
+ ssymname->size = symname_size;
+ memcpy(ssymname->string.utf8, symname, symname_size);
+
+ err = vboxsf_symlink(sbi->root, path, ssymname, &info);
+ kfree(ssymname);
+ __putname(path);
+ if (err) {
+ /* -EROFS means symlinks are note support -> -EPERM */
+ return (err == -EROFS) ? -EPERM : err;
+ }
+
+ err = vboxsf_dir_instantiate(parent, dentry, &info);
+ if (err)
+ return err;
+
+ /* parent directory access/change time changed */
+ sf_parent_i->force_restat = 1;
+ return 0;
+}
+
+const struct inode_operations vboxsf_dir_iops = {
+ .lookup = vboxsf_dir_lookup,
+ .create = vboxsf_dir_mkfile,
+ .mkdir = vboxsf_dir_mkdir,
+ .rmdir = vboxsf_dir_unlink,
+ .unlink = vboxsf_dir_unlink,
+ .rename = vboxsf_dir_rename,
+ .symlink = vboxsf_dir_symlink,
+ .getattr = vboxsf_getattr,
+ .setattr = vboxsf_setattr,
+};
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
new file mode 100644
index 000000000000..c4ab5996d97a
--- /dev/null
+++ b/fs/vboxsf/file.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Regular file inode and file ops.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/sizes.h>
+#include "vfsmod.h"
+
+struct vboxsf_handle {
+ u64 handle;
+ u32 root;
+ u32 access_flags;
+ struct kref refcount;
+ struct list_head head;
+};
+
+static int vboxsf_file_open(struct inode *inode, struct file *file)
+{
+ struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+ struct shfl_createparms params = {};
+ struct vboxsf_handle *sf_handle;
+ u32 access_flags = 0;
+ int err;
+
+ sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL);
+ if (!sf_handle)
+ return -ENOMEM;
+
+ /*
+ * We check the value of params.handle afterwards to find out if
+ * the call succeeded or failed, as the API does not seem to cleanly
+ * distinguish error and informational messages.
+ *
+ * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to
+ * make the shared folders host service use our mode parameter.
+ */
+ params.handle = SHFL_HANDLE_NIL;
+ if (file->f_flags & O_CREAT) {
+ params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW;
+ /*
+ * We ignore O_EXCL, as the Linux kernel seems to call create
+ * beforehand itself, so O_EXCL should always fail.
+ */
+ if (file->f_flags & O_TRUNC)
+ params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+ else
+ params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
+ } else {
+ params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW;
+ if (file->f_flags & O_TRUNC)
+ params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+ }
+
+ switch (file->f_flags & O_ACCMODE) {
+ case O_RDONLY:
+ access_flags |= SHFL_CF_ACCESS_READ;
+ break;
+
+ case O_WRONLY:
+ access_flags |= SHFL_CF_ACCESS_WRITE;
+ break;
+
+ case O_RDWR:
+ access_flags |= SHFL_CF_ACCESS_READWRITE;
+ break;
+
+ default:
+ WARN_ON(1);
+ }
+
+ if (file->f_flags & O_APPEND)
+ access_flags |= SHFL_CF_ACCESS_APPEND;
+
+ params.create_flags |= access_flags;
+ params.info.attr.mode = inode->i_mode;
+
+ err = vboxsf_create_at_dentry(file_dentry(file), &params);
+ if (err == 0 && params.handle == SHFL_HANDLE_NIL)
+ err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT;
+ if (err) {
+ kfree(sf_handle);
+ return err;
+ }
+
+ /* the host may have given us different attr then requested */
+ sf_i->force_restat = 1;
+
+ /* init our handle struct and add it to the inode's handles list */
+ sf_handle->handle = params.handle;
+ sf_handle->root = VBOXSF_SBI(inode->i_sb)->root;
+ sf_handle->access_flags = access_flags;
+ kref_init(&sf_handle->refcount);
+
+ mutex_lock(&sf_i->handle_list_mutex);
+ list_add(&sf_handle->head, &sf_i->handle_list);
+ mutex_unlock(&sf_i->handle_list_mutex);
+
+ file->private_data = sf_handle;
+ return 0;
+}
+
+static void vboxsf_handle_release(struct kref *refcount)
+{
+ struct vboxsf_handle *sf_handle =
+ container_of(refcount, struct vboxsf_handle, refcount);
+
+ vboxsf_close(sf_handle->root, sf_handle->handle);
+ kfree(sf_handle);
+}
+
+static int vboxsf_file_release(struct inode *inode, struct file *file)
+{
+ struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+ struct vboxsf_handle *sf_handle = file->private_data;
+
+ /*
+ * When a file is closed on our (the guest) side, we want any subsequent
+ * accesses done on the host side to see all changes done from our side.
+ */
+ filemap_write_and_wait(inode->i_mapping);
+
+ mutex_lock(&sf_i->handle_list_mutex);
+ list_del(&sf_handle->head);
+ mutex_unlock(&sf_i->handle_list_mutex);
+
+ kref_put(&sf_handle->refcount, vboxsf_handle_release);
+ return 0;
+}
+
+/*
+ * Write back dirty pages now, because there may not be any suitable
+ * open files later
+ */
+static void vboxsf_vma_close(struct vm_area_struct *vma)
+{
+ filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+static const struct vm_operations_struct vboxsf_file_vm_ops = {
+ .close = vboxsf_vma_close,
+ .fault = filemap_fault,
+ .map_pages = filemap_map_pages,
+};
+
+static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int err;
+
+ err = generic_file_mmap(file, vma);
+ if (!err)
+ vma->vm_ops = &vboxsf_file_vm_ops;
+
+ return err;
+}
+
+/*
+ * Note that since we are accessing files on the host's filesystem, files
+ * may always be changed underneath us by the host!
+ *
+ * The vboxsf API between the guest and the host does not offer any functions
+ * to deal with this. There is no inode-generation to check for changes, no
+ * events / callback on changes and no way to lock files.
+ *
+ * To avoid returning stale data when a file gets *opened* on our (the guest)
+ * side, we do a "stat" on the host side, then compare the mtime with the
+ * last known mtime and invalidate the page-cache if they differ.
+ * This is done from vboxsf_inode_revalidate().
+ *
+ * When reads are done through the read_iter fop, it is possible to do
+ * further cache revalidation then, there are 3 options to deal with this:
+ *
+ * 1) Rely solely on the revalidation done at open time
+ * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf
+ * host API does not allow stat on handles, so we would need to use
+ * file->f_path.dentry and the stat will then fail if the file was unlinked
+ * or renamed (and there is no thing like NFS' silly-rename). So we get:
+ * 2a) "stat" and compare mtime, on stat failure invalidate the cache
+ * 2b) "stat" and compare mtime, on stat failure do nothing
+ * 3) Simply always call invalidate_inode_pages2_range on the range of the read
+ *
+ * Currently we are keeping things KISS and using option 1. this allows
+ * directly using generic_file_read_iter without wrapping it.
+ *
+ * This means that only data written on the host side before open() on
+ * the guest side is guaranteed to be seen by the guest. If necessary
+ * we may provide other read-cache strategies in the future and make this
+ * configurable through a mount option.
+ */
+const struct file_operations vboxsf_reg_fops = {
+ .llseek = generic_file_llseek,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
+ .mmap = vboxsf_file_mmap,
+ .open = vboxsf_file_open,
+ .release = vboxsf_file_release,
+ .fsync = noop_fsync,
+ .splice_read = generic_file_splice_read,
+};
+
+const struct inode_operations vboxsf_reg_iops = {
+ .getattr = vboxsf_getattr,
+ .setattr = vboxsf_setattr
+};
+
+static int vboxsf_readpage(struct file *file, struct page *page)
+{
+ struct vboxsf_handle *sf_handle = file->private_data;
+ loff_t off = page_offset(page);
+ u32 nread = PAGE_SIZE;
+ u8 *buf;
+ int err;
+
+ buf = kmap(page);
+
+ err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf);
+ if (err == 0) {
+ memset(&buf[nread], 0, PAGE_SIZE - nread);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ } else {
+ SetPageError(page);
+ }
+
+ kunmap(page);
+ unlock_page(page);
+ return err;
+}
+
+static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i)
+{
+ struct vboxsf_handle *h, *sf_handle = NULL;
+
+ mutex_lock(&sf_i->handle_list_mutex);
+ list_for_each_entry(h, &sf_i->handle_list, head) {
+ if (h->access_flags == SHFL_CF_ACCESS_WRITE ||
+ h->access_flags == SHFL_CF_ACCESS_READWRITE) {
+ kref_get(&h->refcount);
+ sf_handle = h;
+ break;
+ }
+ }
+ mutex_unlock(&sf_i->handle_list_mutex);
+
+ return sf_handle;
+}
+
+static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct inode *inode = page->mapping->host;
+ struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+ struct vboxsf_handle *sf_handle;
+ loff_t off = page_offset(page);
+ loff_t size = i_size_read(inode);
+ u32 nwrite = PAGE_SIZE;
+ u8 *buf;
+ int err;
+
+ if (off + PAGE_SIZE > size)
+ nwrite = size & ~PAGE_MASK;
+
+ sf_handle = vboxsf_get_write_handle(sf_i);
+ if (!sf_handle)
+ return -EBADF;
+
+ buf = kmap(page);
+ err = vboxsf_write(sf_handle->root, sf_handle->handle,
+ off, &nwrite, buf);
+ kunmap(page);
+
+ kref_put(&sf_handle->refcount, vboxsf_handle_release);
+
+ if (err == 0) {
+ ClearPageError(page);
+ /* mtime changed */
+ sf_i->force_restat = 1;
+ } else {
+ ClearPageUptodate(page);
+ }
+
+ unlock_page(page);
+ return err;
+}
+
+static int vboxsf_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ struct vboxsf_handle *sf_handle = file->private_data;
+ unsigned int from = pos & ~PAGE_MASK;
+ u32 nwritten = len;
+ u8 *buf;
+ int err;
+
+ /* zero the stale part of the page if we did a short copy */
+ if (!PageUptodate(page) && copied < len)
+ zero_user(page, from + copied, len - copied);
+
+ buf = kmap(page);
+ err = vboxsf_write(sf_handle->root, sf_handle->handle,
+ pos, &nwritten, buf + from);
+ kunmap(page);
+
+ if (err) {
+ nwritten = 0;
+ goto out;
+ }
+
+ /* mtime changed */
+ VBOXSF_I(inode)->force_restat = 1;
+
+ if (!PageUptodate(page) && nwritten == PAGE_SIZE)
+ SetPageUptodate(page);
+
+ pos += nwritten;
+ if (pos > inode->i_size)
+ i_size_write(inode, pos);
+
+out:
+ unlock_page(page);
+ put_page(page);
+
+ return nwritten;
+}
+
+/*
+ * Note simple_write_begin does not read the page from disk on partial writes
+ * this is ok since vboxsf_write_end only writes the written parts of the
+ * page and it does not call SetPageUptodate for partial writes.
+ */
+const struct address_space_operations vboxsf_reg_aops = {
+ .readpage = vboxsf_readpage,
+ .writepage = vboxsf_writepage,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .write_begin = simple_write_begin,
+ .write_end = vboxsf_write_end,
+};
+
+static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+ struct shfl_string *path;
+ char *link;
+ int err;
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ path = vboxsf_path_from_dentry(sbi, dentry);
+ if (IS_ERR(path))
+ return ERR_CAST(path);
+
+ link = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!link) {
+ __putname(path);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = vboxsf_readlink(sbi->root, path, PATH_MAX, link);
+ __putname(path);
+ if (err) {
+ kfree(link);
+ return ERR_PTR(err);
+ }
+
+ set_delayed_call(done, kfree_link, link);
+ return link;
+}
+
+const struct inode_operations vboxsf_lnk_iops = {
+ .get_link = vboxsf_get_link
+};
diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h
new file mode 100644
index 000000000000..aca829062c12
--- /dev/null
+++ b/fs/vboxsf/shfl_hostintf.h
@@ -0,0 +1,901 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * VirtualBox Shared Folders: host interface definition.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#ifndef SHFL_HOSTINTF_H
+#define SHFL_HOSTINTF_H
+
+#include <linux/vbox_vmmdev_types.h>
+
+/* The max in/out buffer size for a FN_READ or FN_WRITE call */
+#define SHFL_MAX_RW_COUNT (16 * SZ_1M)
+
+/*
+ * Structures shared between guest and the service
+ * can be relocated and use offsets to point to variable
+ * length parts.
+ *
+ * Shared folders protocol works with handles.
+ * Before doing any action on a file system object,
+ * one have to obtain the object handle via a SHFL_FN_CREATE
+ * request. A handle must be closed with SHFL_FN_CLOSE.
+ */
+
+enum {
+ SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */
+ SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */
+ SHFL_FN_CREATE = 3, /* Open/create object. */
+ SHFL_FN_CLOSE = 4, /* Close object handle. */
+ SHFL_FN_READ = 5, /* Read object content. */
+ SHFL_FN_WRITE = 6, /* Write new object content. */
+ SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */
+ SHFL_FN_LIST = 8, /* List object content. */
+ SHFL_FN_INFORMATION = 9, /* Query/set object information. */
+ /* Note function number 10 is not used! */
+ SHFL_FN_REMOVE = 11, /* Remove object */
+ SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */
+ SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */
+ SHFL_FN_RENAME = 14, /* Rename object */
+ SHFL_FN_FLUSH = 15, /* Flush file */
+ SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */
+ SHFL_FN_MAP_FOLDER = 17, /* Map folder */
+ SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */
+ SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */
+ SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */
+};
+
+/* Root handles for a mapping are of type u32, Root handles are unique. */
+#define SHFL_ROOT_NIL UINT_MAX
+
+/* Shared folders handle for an opened object are of type u64. */
+#define SHFL_HANDLE_NIL ULLONG_MAX
+
+/* Hardcoded maximum length (in chars) of a shared folder name. */
+#define SHFL_MAX_LEN (256)
+/* Hardcoded maximum number of shared folder mapping available to the guest. */
+#define SHFL_MAX_MAPPINGS (64)
+
+/** Shared folder string buffer structure. */
+struct shfl_string {
+ /** Allocated size of the string member in bytes. */
+ u16 size;
+
+ /** Length of string without trailing nul in bytes. */
+ u16 length;
+
+ /** UTF-8 or UTF-16 string. Nul terminated. */
+ union {
+ u8 utf8[2];
+ u16 utf16[1];
+ u16 ucs2[1]; /* misnomer, use utf16. */
+ } string;
+};
+VMMDEV_ASSERT_SIZE(shfl_string, 6);
+
+/* The size of shfl_string w/o the string part. */
+#define SHFLSTRING_HEADER_SIZE 4
+
+/* Calculate size of the string. */
+static inline u32 shfl_string_buf_size(const struct shfl_string *string)
+{
+ return string ? SHFLSTRING_HEADER_SIZE + string->size : 0;
+}
+
+/* Set user id on execution (S_ISUID). */
+#define SHFL_UNIX_ISUID 0004000U
+/* Set group id on execution (S_ISGID). */
+#define SHFL_UNIX_ISGID 0002000U
+/* Sticky bit (S_ISVTX / S_ISTXT). */
+#define SHFL_UNIX_ISTXT 0001000U
+
+/* Owner readable (S_IRUSR). */
+#define SHFL_UNIX_IRUSR 0000400U
+/* Owner writable (S_IWUSR). */
+#define SHFL_UNIX_IWUSR 0000200U
+/* Owner executable (S_IXUSR). */
+#define SHFL_UNIX_IXUSR 0000100U
+
+/* Group readable (S_IRGRP). */
+#define SHFL_UNIX_IRGRP 0000040U
+/* Group writable (S_IWGRP). */
+#define SHFL_UNIX_IWGRP 0000020U
+/* Group executable (S_IXGRP). */
+#define SHFL_UNIX_IXGRP 0000010U
+
+/* Other readable (S_IROTH). */
+#define SHFL_UNIX_IROTH 0000004U
+/* Other writable (S_IWOTH). */
+#define SHFL_UNIX_IWOTH 0000002U
+/* Other executable (S_IXOTH). */
+#define SHFL_UNIX_IXOTH 0000001U
+
+/* Named pipe (fifo) (S_IFIFO). */
+#define SHFL_TYPE_FIFO 0010000U
+/* Character device (S_IFCHR). */
+#define SHFL_TYPE_DEV_CHAR 0020000U
+/* Directory (S_IFDIR). */
+#define SHFL_TYPE_DIRECTORY 0040000U
+/* Block device (S_IFBLK). */
+#define SHFL_TYPE_DEV_BLOCK 0060000U
+/* Regular file (S_IFREG). */
+#define SHFL_TYPE_FILE 0100000U
+/* Symbolic link (S_IFLNK). */
+#define SHFL_TYPE_SYMLINK 0120000U
+/* Socket (S_IFSOCK). */
+#define SHFL_TYPE_SOCKET 0140000U
+/* Whiteout (S_IFWHT). */
+#define SHFL_TYPE_WHITEOUT 0160000U
+/* Type mask (S_IFMT). */
+#define SHFL_TYPE_MASK 0170000U
+
+/* Checks the mode flags indicate a directory (S_ISDIR). */
+#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY)
+/* Checks the mode flags indicate a symbolic link (S_ISLNK). */
+#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK)
+
+/** The available additional information in a shfl_fsobjattr object. */
+enum shfl_fsobjattr_add {
+ /** No additional information is available / requested. */
+ SHFLFSOBJATTRADD_NOTHING = 1,
+ /**
+ * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are
+ * available / requested.
+ */
+ SHFLFSOBJATTRADD_UNIX,
+ /**
+ * The additional extended attribute size (shfl_fsobjattr::u::size) is
+ * available / requested.
+ */
+ SHFLFSOBJATTRADD_EASIZE,
+ /**
+ * The last valid item (inclusive).
+ * The valid range is SHFLFSOBJATTRADD_NOTHING thru
+ * SHFLFSOBJATTRADD_LAST.
+ */
+ SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE,
+
+ /** The usual 32-bit hack. */
+ SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff
+};
+
+/**
+ * Additional unix Attributes, these are available when
+ * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX.
+ */
+struct shfl_fsobjattr_unix {
+ /**
+ * The user owning the filesystem object (st_uid).
+ * This field is ~0U if not supported.
+ */
+ u32 uid;
+
+ /**
+ * The group the filesystem object is assigned (st_gid).
+ * This field is ~0U if not supported.
+ */
+ u32 gid;
+
+ /**
+ * Number of hard links to this filesystem object (st_nlink).
+ * This field is 1 if the filesystem doesn't support hardlinking or
+ * the information isn't available.
+ */
+ u32 hardlinks;
+
+ /**
+ * The device number of the device which this filesystem object resides
+ * on (st_dev). This field is 0 if this information is not available.
+ */
+ u32 inode_id_device;
+
+ /**
+ * The unique identifier (within the filesystem) of this filesystem
+ * object (st_ino). Together with inode_id_device, this field can be
+ * used as a OS wide unique id, when both their values are not 0.
+ * This field is 0 if the information is not available.
+ */
+ u64 inode_id;
+
+ /**
+ * User flags (st_flags).
+ * This field is 0 if this information is not available.
+ */
+ u32 flags;
+
+ /**
+ * The current generation number (st_gen).
+ * This field is 0 if this information is not available.
+ */
+ u32 generation_id;
+
+ /**
+ * The device number of a char. or block device type object (st_rdev).
+ * This field is 0 if the file isn't a char. or block device or when
+ * the OS doesn't use the major+minor device idenfication scheme.
+ */
+ u32 device;
+} __packed;
+
+/** Extended attribute size. */
+struct shfl_fsobjattr_easize {
+ /** Size of EAs. */
+ s64 cb;
+} __packed;
+
+/** Shared folder filesystem object attributes. */
+struct shfl_fsobjattr {
+ /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */
+ u32 mode;
+
+ /** The additional attributes available. */
+ enum shfl_fsobjattr_add additional;
+
+ /**
+ * Additional attributes.
+ *
+ * Unless explicitly specified to an API, the API can provide additional
+ * data as it is provided by the underlying OS.
+ */
+ union {
+ struct shfl_fsobjattr_unix unix_attr;
+ struct shfl_fsobjattr_easize size;
+ } __packed u;
+} __packed;
+VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44);
+
+struct shfl_timespec {
+ s64 ns_relative_to_unix_epoch;
+};
+
+/** Filesystem object information structure. */
+struct shfl_fsobjinfo {
+ /**
+ * Logical size (st_size).
+ * For normal files this is the size of the file.
+ * For symbolic links, this is the length of the path name contained
+ * in the symbolic link.
+ * For other objects this fields needs to be specified.
+ */
+ s64 size;
+
+ /** Disk allocation size (st_blocks * DEV_BSIZE). */
+ s64 allocated;
+
+ /** Time of last access (st_atime). */
+ struct shfl_timespec access_time;
+
+ /** Time of last data modification (st_mtime). */
+ struct shfl_timespec modification_time;
+
+ /**
+ * Time of last status change (st_ctime).
+ * If not available this is set to modification_time.
+ */
+ struct shfl_timespec change_time;
+
+ /**
+ * Time of file birth (st_birthtime).
+ * If not available this is set to change_time.
+ */
+ struct shfl_timespec birth_time;
+
+ /** Attributes. */
+ struct shfl_fsobjattr attr;
+
+} __packed;
+VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92);
+
+/**
+ * result of an open/create request.
+ * Along with handle value the result code
+ * identifies what has happened while
+ * trying to open the object.
+ */
+enum shfl_create_result {
+ SHFL_NO_RESULT,
+ /** Specified path does not exist. */
+ SHFL_PATH_NOT_FOUND,
+ /** Path to file exists, but the last component does not. */
+ SHFL_FILE_NOT_FOUND,
+ /** File already exists and either has been opened or not. */
+ SHFL_FILE_EXISTS,
+ /** New file was created. */
+ SHFL_FILE_CREATED,
+ /** Existing file was replaced or overwritten. */
+ SHFL_FILE_REPLACED
+};
+
+/* No flags. Initialization value. */
+#define SHFL_CF_NONE (0x00000000)
+
+/*
+ * Only lookup the object, do not return a handle. When this is set all other
+ * flags are ignored.
+ */
+#define SHFL_CF_LOOKUP (0x00000001)
+
+/*
+ * Open parent directory of specified object.
+ * Useful for the corresponding Windows FSD flag
+ * and for opening paths like \\dir\\*.* to search the 'dir'.
+ */
+#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002)
+
+/* Create/open a directory. */
+#define SHFL_CF_DIRECTORY (0x00000004)
+
+/*
+ * Open/create action to do if object exists
+ * and if the object does not exists.
+ * REPLACE file means atomically DELETE and CREATE.
+ * OVERWRITE file means truncating the file to 0 and
+ * setting new size.
+ * When opening an existing directory REPLACE and OVERWRITE
+ * actions are considered invalid, and cause returning
+ * FILE_EXISTS with NIL handle.
+ */
+#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0)
+#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00)
+
+/* What to do if object exists. */
+#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000)
+#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010)
+#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020)
+#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030)
+
+/* What to do if object does not exist. */
+#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000)
+#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100)
+
+/* Read/write requested access for the object. */
+#define SHFL_CF_ACCESS_MASK_RW (0x00003000)
+
+/* No access requested. */
+#define SHFL_CF_ACCESS_NONE (0x00000000)
+/* Read access requested. */
+#define SHFL_CF_ACCESS_READ (0x00001000)
+/* Write access requested. */
+#define SHFL_CF_ACCESS_WRITE (0x00002000)
+/* Read/Write access requested. */
+#define SHFL_CF_ACCESS_READWRITE (0x00003000)
+
+/* Requested share access for the object. */
+#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000)
+
+/* Allow any access. */
+#define SHFL_CF_ACCESS_DENYNONE (0x00000000)
+/* Do not allow read. */
+#define SHFL_CF_ACCESS_DENYREAD (0x00004000)
+/* Do not allow write. */
+#define SHFL_CF_ACCESS_DENYWRITE (0x00008000)
+/* Do not allow access. */
+#define SHFL_CF_ACCESS_DENYALL (0x0000c000)
+
+/* Requested access to attributes of the object. */
+#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000)
+
+/* No access requested. */
+#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000)
+/* Read access requested. */
+#define SHFL_CF_ACCESS_ATTR_READ (0x00010000)
+/* Write access requested. */
+#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000)
+/* Read/Write access requested. */
+#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000)
+
+/*
+ * The file is opened in append mode.
+ * Ignored if SHFL_CF_ACCESS_WRITE is not set.
+ */
+#define SHFL_CF_ACCESS_APPEND (0x00040000)
+
+/** Create parameters buffer struct for SHFL_FN_CREATE call */
+struct shfl_createparms {
+ /** Returned handle of opened object. */
+ u64 handle;
+
+ /** Returned result of the operation */
+ enum shfl_create_result result;
+
+ /** SHFL_CF_* */
+ u32 create_flags;
+
+ /**
+ * Attributes of object to create and
+ * returned actual attributes of opened/created object.
+ */
+ struct shfl_fsobjinfo info;
+} __packed;
+
+/** Shared Folder directory information */
+struct shfl_dirinfo {
+ /** Full information about the object. */
+ struct shfl_fsobjinfo info;
+ /**
+ * The length of the short field (number of UTF16 chars).
+ * It is 16-bit for reasons of alignment.
+ */
+ u16 short_name_len;
+ /**
+ * The short name for 8.3 compatibility.
+ * Empty string if not available.
+ */
+ u16 short_name[14];
+ struct shfl_string name;
+};
+
+/** Shared folder filesystem properties. */
+struct shfl_fsproperties {
+ /**
+ * The maximum size of a filesystem object name.
+ * This does not include the '\\0'.
+ */
+ u32 max_component_len;
+
+ /**
+ * True if the filesystem is remote.
+ * False if the filesystem is local.
+ */
+ bool remote;
+
+ /**
+ * True if the filesystem is case sensitive.
+ * False if the filesystem is case insensitive.
+ */
+ bool case_sensitive;
+
+ /**
+ * True if the filesystem is mounted read only.
+ * False if the filesystem is mounted read write.
+ */
+ bool read_only;
+
+ /**
+ * True if the filesystem can encode unicode object names.
+ * False if it can't.
+ */
+ bool supports_unicode;
+
+ /**
+ * True if the filesystem is compresses.
+ * False if it isn't or we don't know.
+ */
+ bool compressed;
+
+ /**
+ * True if the filesystem compresses of individual files.
+ * False if it doesn't or we don't know.
+ */
+ bool file_compression;
+};
+VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12);
+
+struct shfl_volinfo {
+ s64 total_allocation_bytes;
+ s64 available_allocation_bytes;
+ u32 bytes_per_allocation_unit;
+ u32 bytes_per_sector;
+ u32 serial;
+ struct shfl_fsproperties properties;
+};
+
+
+/** SHFL_FN_MAP_FOLDER Parameters structure. */
+struct shfl_map_folder {
+ /**
+ * pointer, in:
+ * Points to struct shfl_string buffer.
+ */
+ struct vmmdev_hgcm_function_parameter path;
+
+ /**
+ * pointer, out: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in: UTF16
+ * Path delimiter
+ */
+ struct vmmdev_hgcm_function_parameter delimiter;
+
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Case senstive flag
+ */
+ struct vmmdev_hgcm_function_parameter case_sensitive;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_MAP_FOLDER (4)
+
+
+/** SHFL_FN_UNMAP_FOLDER Parameters structure. */
+struct shfl_unmap_folder {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_UNMAP_FOLDER (1)
+
+
+/** SHFL_FN_CREATE Parameters structure. */
+struct shfl_create {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string buffer.
+ */
+ struct vmmdev_hgcm_function_parameter path;
+
+ /**
+ * pointer, in/out:
+ * Points to struct shfl_createparms buffer.
+ */
+ struct vmmdev_hgcm_function_parameter parms;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_CREATE (3)
+
+
+/** SHFL_FN_CLOSE Parameters structure. */
+struct shfl_close {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * value64, in:
+ * SHFLHANDLE (u64) of object to close.
+ */
+ struct vmmdev_hgcm_function_parameter handle;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_CLOSE (2)
+
+
+/** SHFL_FN_READ Parameters structure. */
+struct shfl_read {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * value64, in:
+ * SHFLHANDLE (u64) of object to read from.
+ */
+ struct vmmdev_hgcm_function_parameter handle;
+
+ /**
+ * value64, in:
+ * Offset to read from.
+ */
+ struct vmmdev_hgcm_function_parameter offset;
+
+ /**
+ * value64, in/out:
+ * Bytes to read/How many were read.
+ */
+ struct vmmdev_hgcm_function_parameter cb;
+
+ /**
+ * pointer, out:
+ * Buffer to place data to.
+ */
+ struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_READ (5)
+
+
+/** SHFL_FN_WRITE Parameters structure. */
+struct shfl_write {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * value64, in:
+ * SHFLHANDLE (u64) of object to write to.
+ */
+ struct vmmdev_hgcm_function_parameter handle;
+
+ /**
+ * value64, in:
+ * Offset to write to.
+ */
+ struct vmmdev_hgcm_function_parameter offset;
+
+ /**
+ * value64, in/out:
+ * Bytes to write/How many were written.
+ */
+ struct vmmdev_hgcm_function_parameter cb;
+
+ /**
+ * pointer, in:
+ * Data to write.
+ */
+ struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_WRITE (5)
+
+
+/*
+ * SHFL_FN_LIST
+ * Listing information includes variable length RTDIRENTRY[EX] structures.
+ */
+
+#define SHFL_LIST_NONE 0
+#define SHFL_LIST_RETURN_ONE 1
+
+/** SHFL_FN_LIST Parameters structure. */
+struct shfl_list {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * value64, in:
+ * SHFLHANDLE (u64) of object to be listed.
+ */
+ struct vmmdev_hgcm_function_parameter handle;
+
+ /**
+ * value32, in:
+ * List flags SHFL_LIST_*.
+ */
+ struct vmmdev_hgcm_function_parameter flags;
+
+ /**
+ * value32, in/out:
+ * Bytes to be used for listing information/How many bytes were used.
+ */
+ struct vmmdev_hgcm_function_parameter cb;
+
+ /**
+ * pointer, in/optional
+ * Points to struct shfl_string buffer that specifies a search path.
+ */
+ struct vmmdev_hgcm_function_parameter path;
+
+ /**
+ * pointer, out:
+ * Buffer to place listing information to. (struct shfl_dirinfo)
+ */
+ struct vmmdev_hgcm_function_parameter buffer;
+
+ /**
+ * value32, in/out:
+ * Indicates a key where the listing must be resumed.
+ * in: 0 means start from begin of object.
+ * out: 0 means listing completed.
+ */
+ struct vmmdev_hgcm_function_parameter resume_point;
+
+ /**
+ * pointer, out:
+ * Number of files returned
+ */
+ struct vmmdev_hgcm_function_parameter file_count;
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_LIST (8)
+
+
+/** SHFL_FN_READLINK Parameters structure. */
+struct shfl_readLink {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string buffer.
+ */
+ struct vmmdev_hgcm_function_parameter path;
+
+ /**
+ * pointer, out:
+ * Buffer to place data to.
+ */
+ struct vmmdev_hgcm_function_parameter buffer;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_READLINK (3)
+
+
+/* SHFL_FN_INFORMATION */
+
+/* Mask of Set/Get bit. */
+#define SHFL_INFO_MODE_MASK (0x1)
+/* Get information */
+#define SHFL_INFO_GET (0x0)
+/* Set information */
+#define SHFL_INFO_SET (0x1)
+
+/* Get name of the object. */
+#define SHFL_INFO_NAME (0x2)
+/* Set size of object (extend/trucate); only applies to file objects */
+#define SHFL_INFO_SIZE (0x4)
+/* Get/Set file object info. */
+#define SHFL_INFO_FILE (0x8)
+/* Get volume information. */
+#define SHFL_INFO_VOLUME (0x10)
+
+/** SHFL_FN_INFORMATION Parameters structure. */
+struct shfl_information {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * value64, in:
+ * SHFLHANDLE (u64) of object to be listed.
+ */
+ struct vmmdev_hgcm_function_parameter handle;
+
+ /**
+ * value32, in:
+ * SHFL_INFO_*
+ */
+ struct vmmdev_hgcm_function_parameter flags;
+
+ /**
+ * value32, in/out:
+ * Bytes to be used for information/How many bytes were used.
+ */
+ struct vmmdev_hgcm_function_parameter cb;
+
+ /**
+ * pointer, in/out:
+ * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not
+ * forget to set the shfl_fsobjinfo::attr::additional for a get
+ * operation as well.
+ */
+ struct vmmdev_hgcm_function_parameter info;
+
+};
+
+/* Number of parameters */
+#define SHFL_CPARMS_INFORMATION (5)
+
+
+/* SHFL_FN_REMOVE */
+
+#define SHFL_REMOVE_FILE (0x1)
+#define SHFL_REMOVE_DIR (0x2)
+#define SHFL_REMOVE_SYMLINK (0x4)
+
+/** SHFL_FN_REMOVE Parameters structure. */
+struct shfl_remove {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string buffer.
+ */
+ struct vmmdev_hgcm_function_parameter path;
+
+ /**
+ * value32, in:
+ * remove flags (file/directory)
+ */
+ struct vmmdev_hgcm_function_parameter flags;
+
+};
+
+#define SHFL_CPARMS_REMOVE (3)
+
+
+/* SHFL_FN_RENAME */
+
+#define SHFL_RENAME_FILE (0x1)
+#define SHFL_RENAME_DIR (0x2)
+#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4)
+
+/** SHFL_FN_RENAME Parameters structure. */
+struct shfl_rename {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string src.
+ */
+ struct vmmdev_hgcm_function_parameter src;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string dest.
+ */
+ struct vmmdev_hgcm_function_parameter dest;
+
+ /**
+ * value32, in:
+ * rename flags (file/directory)
+ */
+ struct vmmdev_hgcm_function_parameter flags;
+
+};
+
+#define SHFL_CPARMS_RENAME (4)
+
+
+/** SHFL_FN_SYMLINK Parameters structure. */
+struct shfl_symlink {
+ /**
+ * pointer, in: SHFLROOT (u32)
+ * Root handle of the mapping which name is queried.
+ */
+ struct vmmdev_hgcm_function_parameter root;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string of path for the new symlink.
+ */
+ struct vmmdev_hgcm_function_parameter new_path;
+
+ /**
+ * pointer, in:
+ * Points to struct shfl_string of destination for symlink.
+ */
+ struct vmmdev_hgcm_function_parameter old_path;
+
+ /**
+ * pointer, out:
+ * Information about created symlink.
+ */
+ struct vmmdev_hgcm_function_parameter info;
+
+};
+
+#define SHFL_CPARMS_SYMLINK (4)
+
+#endif
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
new file mode 100644
index 000000000000..675e26989376
--- /dev/null
+++ b/fs/vboxsf/super.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Virtual File System.
+ *
+ * Module initialization/finalization
+ * File system registration/deregistration
+ * Superblock reading
+ * Few utility functions
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/idr.h>
+#include <linux/fs_parser.h>
+#include <linux/magic.h>
+#include <linux/module.h>
+#include <linux/nls.h>
+#include <linux/statfs.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */
+
+#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000')
+#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377')
+#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376')
+#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375')
+
+static int follow_symlinks;
+module_param(follow_symlinks, int, 0444);
+MODULE_PARM_DESC(follow_symlinks,
+ "Let host resolve symlinks rather than showing them");
+
+static DEFINE_IDA(vboxsf_bdi_ida);
+static DEFINE_MUTEX(vboxsf_setup_mutex);
+static bool vboxsf_setup_done;
+static struct super_operations vboxsf_super_ops; /* forward declaration */
+static struct kmem_cache *vboxsf_inode_cachep;
+
+static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT;
+
+enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode,
+ opt_dmask, opt_fmask };
+
+static const struct fs_parameter_spec vboxsf_fs_parameters[] = {
+ fsparam_string ("nls", opt_nls),
+ fsparam_u32 ("uid", opt_uid),
+ fsparam_u32 ("gid", opt_gid),
+ fsparam_u32 ("ttl", opt_ttl),
+ fsparam_u32oct ("dmode", opt_dmode),
+ fsparam_u32oct ("fmode", opt_fmode),
+ fsparam_u32oct ("dmask", opt_dmask),
+ fsparam_u32oct ("fmask", opt_fmask),
+ {}
+};
+
+static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct vboxsf_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ kuid_t uid;
+ kgid_t gid;
+ int opt;
+
+ opt = fs_parse(fc, vboxsf_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case opt_nls:
+ if (ctx->nls_name || fc->purpose != FS_CONTEXT_FOR_MOUNT) {
+ vbg_err("vboxsf: Cannot reconfigure nls option\n");
+ return -EINVAL;
+ }
+ ctx->nls_name = param->string;
+ param->string = NULL;
+ break;
+ case opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return -EINVAL;
+ ctx->o.uid = uid;
+ break;
+ case opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return -EINVAL;
+ ctx->o.gid = gid;
+ break;
+ case opt_ttl:
+ ctx->o.ttl = msecs_to_jiffies(result.uint_32);
+ break;
+ case opt_dmode:
+ if (result.uint_32 & ~0777)
+ return -EINVAL;
+ ctx->o.dmode = result.uint_32;
+ ctx->o.dmode_set = true;
+ break;
+ case opt_fmode:
+ if (result.uint_32 & ~0777)
+ return -EINVAL;
+ ctx->o.fmode = result.uint_32;
+ ctx->o.fmode_set = true;
+ break;
+ case opt_dmask:
+ if (result.uint_32 & ~07777)
+ return -EINVAL;
+ ctx->o.dmask = result.uint_32;
+ break;
+ case opt_fmask:
+ if (result.uint_32 & ~07777)
+ return -EINVAL;
+ ctx->o.fmask = result.uint_32;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct vboxsf_fs_context *ctx = fc->fs_private;
+ struct shfl_string *folder_name, root_path;
+ struct vboxsf_sbi *sbi;
+ struct dentry *droot;
+ struct inode *iroot;
+ char *nls_name;
+ size_t size;
+ int err;
+
+ if (!fc->source)
+ return -EINVAL;
+
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ sbi->o = ctx->o;
+ idr_init(&sbi->ino_idr);
+ spin_lock_init(&sbi->ino_idr_lock);
+ sbi->next_generation = 1;
+ sbi->bdi_id = -1;
+
+ /* Load nls if not utf8 */
+ nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls;
+ if (strcmp(nls_name, "utf8") != 0) {
+ if (nls_name == vboxsf_default_nls)
+ sbi->nls = load_nls_default();
+ else
+ sbi->nls = load_nls(nls_name);
+
+ if (!sbi->nls) {
+ vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
+ err = -EINVAL;
+ goto fail_free;
+ }
+ }
+
+ sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+ if (sbi->bdi_id < 0) {
+ err = sbi->bdi_id;
+ goto fail_free;
+ }
+
+ err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id);
+ if (err)
+ goto fail_free;
+
+ /* Turn source into a shfl_string and map the folder */
+ size = strlen(fc->source) + 1;
+ folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL);
+ if (!folder_name) {
+ err = -ENOMEM;
+ goto fail_free;
+ }
+ folder_name->size = size;
+ folder_name->length = size - 1;
+ strlcpy(folder_name->string.utf8, fc->source, size);
+ err = vboxsf_map_folder(folder_name, &sbi->root);
+ kfree(folder_name);
+ if (err) {
+ vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n",
+ fc->source, err);
+ goto fail_free;
+ }
+
+ root_path.length = 1;
+ root_path.size = 2;
+ root_path.string.utf8[0] = '/';
+ root_path.string.utf8[1] = 0;
+ err = vboxsf_stat(sbi, &root_path, &sbi->root_info);
+ if (err)
+ goto fail_unmap;
+
+ sb->s_magic = VBOXSF_SUPER_MAGIC;
+ sb->s_blocksize = 1024;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_op = &vboxsf_super_ops;
+ sb->s_d_op = &vboxsf_dentry_ops;
+
+ iroot = iget_locked(sb, 0);
+ if (!iroot) {
+ err = -ENOMEM;
+ goto fail_unmap;
+ }
+ vboxsf_init_inode(sbi, iroot, &sbi->root_info);
+ unlock_new_inode(iroot);
+
+ droot = d_make_root(iroot);
+ if (!droot) {
+ err = -ENOMEM;
+ goto fail_unmap;
+ }
+
+ sb->s_root = droot;
+ sb->s_fs_info = sbi;
+ return 0;
+
+fail_unmap:
+ vboxsf_unmap_folder(sbi->root);
+fail_free:
+ if (sbi->bdi_id >= 0)
+ ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ if (sbi->nls)
+ unload_nls(sbi->nls);
+ idr_destroy(&sbi->ino_idr);
+ kfree(sbi);
+ return err;
+}
+
+static void vboxsf_inode_init_once(void *data)
+{
+ struct vboxsf_inode *sf_i = data;
+
+ mutex_init(&sf_i->handle_list_mutex);
+ inode_init_once(&sf_i->vfs_inode);
+}
+
+static struct inode *vboxsf_alloc_inode(struct super_block *sb)
+{
+ struct vboxsf_inode *sf_i;
+
+ sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS);
+ if (!sf_i)
+ return NULL;
+
+ sf_i->force_restat = 0;
+ INIT_LIST_HEAD(&sf_i->handle_list);
+
+ return &sf_i->vfs_inode;
+}
+
+static void vboxsf_free_inode(struct inode *inode)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->ino_idr_lock, flags);
+ idr_remove(&sbi->ino_idr, inode->i_ino);
+ spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
+ kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode));
+}
+
+static void vboxsf_put_super(struct super_block *sb)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
+
+ vboxsf_unmap_folder(sbi->root);
+ if (sbi->bdi_id >= 0)
+ ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+ if (sbi->nls)
+ unload_nls(sbi->nls);
+
+ /*
+ * vboxsf_free_inode uses the idr, make sure all delayed rcu free
+ * inodes are flushed.
+ */
+ rcu_barrier();
+ idr_destroy(&sbi->ino_idr);
+ kfree(sbi);
+}
+
+static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct shfl_volinfo shfl_volinfo;
+ struct vboxsf_sbi *sbi;
+ u32 buf_len;
+ int err;
+
+ sbi = VBOXSF_SBI(sb);
+ buf_len = sizeof(shfl_volinfo);
+ err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME,
+ &buf_len, &shfl_volinfo);
+ if (err)
+ return err;
+
+ stat->f_type = VBOXSF_SUPER_MAGIC;
+ stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit;
+
+ do_div(shfl_volinfo.total_allocation_bytes,
+ shfl_volinfo.bytes_per_allocation_unit);
+ stat->f_blocks = shfl_volinfo.total_allocation_bytes;
+
+ do_div(shfl_volinfo.available_allocation_bytes,
+ shfl_volinfo.bytes_per_allocation_unit);
+ stat->f_bfree = shfl_volinfo.available_allocation_bytes;
+ stat->f_bavail = shfl_volinfo.available_allocation_bytes;
+
+ stat->f_files = 1000;
+ /*
+ * Don't return 0 here since the guest may then think that it is not
+ * possible to create any more files.
+ */
+ stat->f_ffree = 1000000;
+ stat->f_fsid.val[0] = 0;
+ stat->f_fsid.val[1] = 0;
+ stat->f_namelen = 255;
+ return 0;
+}
+
+static struct super_operations vboxsf_super_ops = {
+ .alloc_inode = vboxsf_alloc_inode,
+ .free_inode = vboxsf_free_inode,
+ .put_super = vboxsf_put_super,
+ .statfs = vboxsf_statfs,
+};
+
+static int vboxsf_setup(void)
+{
+ int err;
+
+ mutex_lock(&vboxsf_setup_mutex);
+
+ if (vboxsf_setup_done)
+ goto success;
+
+ vboxsf_inode_cachep =
+ kmem_cache_create("vboxsf_inode_cache",
+ sizeof(struct vboxsf_inode), 0,
+ (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD |
+ SLAB_ACCOUNT),
+ vboxsf_inode_init_once);
+ if (!vboxsf_inode_cachep) {
+ err = -ENOMEM;
+ goto fail_nomem;
+ }
+
+ err = vboxsf_connect();
+ if (err) {
+ vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err);
+ vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n");
+ vbg_err("vboxsf: and check dmesg for vboxguest errors\n");
+ goto fail_free_cache;
+ }
+
+ err = vboxsf_set_utf8();
+ if (err) {
+ vbg_err("vboxsf_setutf8 error %d\n", err);
+ goto fail_disconnect;
+ }
+
+ if (!follow_symlinks) {
+ err = vboxsf_set_symlinks();
+ if (err)
+ vbg_warn("vboxsf: Unable to show symlinks: %d\n", err);
+ }
+
+ vboxsf_setup_done = true;
+success:
+ mutex_unlock(&vboxsf_setup_mutex);
+ return 0;
+
+fail_disconnect:
+ vboxsf_disconnect();
+fail_free_cache:
+ kmem_cache_destroy(vboxsf_inode_cachep);
+fail_nomem:
+ mutex_unlock(&vboxsf_setup_mutex);
+ return err;
+}
+
+static int vboxsf_parse_monolithic(struct fs_context *fc, void *data)
+{
+ char *options = data;
+
+ if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 &&
+ options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 &&
+ options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 &&
+ options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) {
+ vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n");
+ return -EINVAL;
+ }
+
+ return generic_parse_monolithic(fc, data);
+}
+
+static int vboxsf_get_tree(struct fs_context *fc)
+{
+ int err;
+
+ err = vboxsf_setup();
+ if (err)
+ return err;
+
+ return get_tree_nodev(fc, vboxsf_fill_super);
+}
+
+static int vboxsf_reconfigure(struct fs_context *fc)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb);
+ struct vboxsf_fs_context *ctx = fc->fs_private;
+ struct inode *iroot = fc->root->d_sb->s_root->d_inode;
+
+ /* Apply changed options to the root inode */
+ sbi->o = ctx->o;
+ vboxsf_init_inode(sbi, iroot, &sbi->root_info);
+
+ return 0;
+}
+
+static void vboxsf_free_fc(struct fs_context *fc)
+{
+ struct vboxsf_fs_context *ctx = fc->fs_private;
+
+ kfree(ctx->nls_name);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations vboxsf_context_ops = {
+ .free = vboxsf_free_fc,
+ .parse_param = vboxsf_parse_param,
+ .parse_monolithic = vboxsf_parse_monolithic,
+ .get_tree = vboxsf_get_tree,
+ .reconfigure = vboxsf_reconfigure,
+};
+
+static int vboxsf_init_fs_context(struct fs_context *fc)
+{
+ struct vboxsf_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ current_uid_gid(&ctx->o.uid, &ctx->o.gid);
+
+ fc->fs_private = ctx;
+ fc->ops = &vboxsf_context_ops;
+ return 0;
+}
+
+static struct file_system_type vboxsf_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "vboxsf",
+ .init_fs_context = vboxsf_init_fs_context,
+ .kill_sb = kill_anon_super
+};
+
+/* Module initialization/finalization handlers */
+static int __init vboxsf_init(void)
+{
+ return register_filesystem(&vboxsf_fs_type);
+}
+
+static void __exit vboxsf_fini(void)
+{
+ unregister_filesystem(&vboxsf_fs_type);
+
+ mutex_lock(&vboxsf_setup_mutex);
+ if (vboxsf_setup_done) {
+ vboxsf_disconnect();
+ /*
+ * Make sure all delayed rcu free inodes are flushed
+ * before we destroy the cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(vboxsf_inode_cachep);
+ }
+ mutex_unlock(&vboxsf_setup_mutex);
+}
+
+module_init(vboxsf_init);
+module_exit(vboxsf_fini);
+
+MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access");
+MODULE_AUTHOR("Oracle Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_FS("vboxsf");
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
new file mode 100644
index 000000000000..96bd160da48b
--- /dev/null
+++ b/fs/vboxsf/utils.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: MIT
+/*
+ * VirtualBox Guest Shared Folders support: Utility functions.
+ * Mainly conversion from/to VirtualBox/Linux data structures.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/namei.h>
+#include <linux/nls.h>
+#include <linux/sizes.h>
+#include <linux/vfs.h>
+#include "vfsmod.h"
+
+struct inode *vboxsf_new_inode(struct super_block *sb)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(sb);
+ struct inode *inode;
+ unsigned long flags;
+ int cursor, ret;
+ u32 gen;
+
+ inode = new_inode(sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ idr_preload(GFP_KERNEL);
+ spin_lock_irqsave(&sbi->ino_idr_lock, flags);
+ cursor = idr_get_cursor(&sbi->ino_idr);
+ ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC);
+ if (ret >= 0 && ret < cursor)
+ sbi->next_generation++;
+ gen = sbi->next_generation;
+ spin_unlock_irqrestore(&sbi->ino_idr_lock, flags);
+ idr_preload_end();
+
+ if (ret < 0) {
+ iput(inode);
+ return ERR_PTR(ret);
+ }
+
+ inode->i_ino = ret;
+ inode->i_generation = gen;
+ return inode;
+}
+
+/* set [inode] attributes based on [info], uid/gid based on [sbi] */
+void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
+ const struct shfl_fsobjinfo *info)
+{
+ const struct shfl_fsobjattr *attr;
+ s64 allocated;
+ int mode;
+
+ attr = &info->attr;
+
+#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0)
+
+ mode = mode_set(IRUSR);
+ mode |= mode_set(IWUSR);
+ mode |= mode_set(IXUSR);
+
+ mode |= mode_set(IRGRP);
+ mode |= mode_set(IWGRP);
+ mode |= mode_set(IXGRP);
+
+ mode |= mode_set(IROTH);
+ mode |= mode_set(IWOTH);
+ mode |= mode_set(IXOTH);
+
+#undef mode_set
+
+ /* We use the host-side values for these */
+ inode->i_flags |= S_NOATIME | S_NOCMTIME;
+ inode->i_mapping->a_ops = &vboxsf_reg_aops;
+
+ if (SHFL_IS_DIRECTORY(attr->mode)) {
+ inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode;
+ inode->i_mode &= ~sbi->o.dmask;
+ inode->i_mode |= S_IFDIR;
+ inode->i_op = &vboxsf_dir_iops;
+ inode->i_fop = &vboxsf_dir_fops;
+ /*
+ * XXX: this probably should be set to the number of entries
+ * in the directory plus two (. ..)
+ */
+ set_nlink(inode, 1);
+ } else if (SHFL_IS_SYMLINK(attr->mode)) {
+ inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
+ inode->i_mode &= ~sbi->o.fmask;
+ inode->i_mode |= S_IFLNK;
+ inode->i_op = &vboxsf_lnk_iops;
+ set_nlink(inode, 1);
+ } else {
+ inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode;
+ inode->i_mode &= ~sbi->o.fmask;
+ inode->i_mode |= S_IFREG;
+ inode->i_op = &vboxsf_reg_iops;
+ inode->i_fop = &vboxsf_reg_fops;
+ set_nlink(inode, 1);
+ }
+
+ inode->i_uid = sbi->o.uid;
+ inode->i_gid = sbi->o.gid;
+
+ inode->i_size = info->size;
+ inode->i_blkbits = 12;
+ /* i_blocks always in units of 512 bytes! */
+ allocated = info->allocated + 511;
+ do_div(allocated, 512);
+ inode->i_blocks = allocated;
+
+ inode->i_atime = ns_to_timespec64(
+ info->access_time.ns_relative_to_unix_epoch);
+ inode->i_ctime = ns_to_timespec64(
+ info->change_time.ns_relative_to_unix_epoch);
+ inode->i_mtime = ns_to_timespec64(
+ info->modification_time.ns_relative_to_unix_epoch);
+}
+
+int vboxsf_create_at_dentry(struct dentry *dentry,
+ struct shfl_createparms *params)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+ struct shfl_string *path;
+ int err;
+
+ path = vboxsf_path_from_dentry(sbi, dentry);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ err = vboxsf_create(sbi->root, path, params);
+ __putname(path);
+
+ return err;
+}
+
+int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
+ struct shfl_fsobjinfo *info)
+{
+ struct shfl_createparms params = {};
+ int err;
+
+ params.handle = SHFL_HANDLE_NIL;
+ params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
+
+ err = vboxsf_create(sbi->root, path, &params);
+ if (err)
+ return err;
+
+ if (params.result != SHFL_FILE_EXISTS)
+ return -ENOENT;
+
+ if (info)
+ *info = params.info;
+
+ return 0;
+}
+
+int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info)
+{
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+ struct shfl_string *path;
+ int err;
+
+ path = vboxsf_path_from_dentry(sbi, dentry);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ err = vboxsf_stat(sbi, path, info);
+ __putname(path);
+ return err;
+}
+
+int vboxsf_inode_revalidate(struct dentry *dentry)
+{
+ struct vboxsf_sbi *sbi;
+ struct vboxsf_inode *sf_i;
+ struct shfl_fsobjinfo info;
+ struct timespec64 prev_mtime;
+ struct inode *inode;
+ int err;
+
+ if (!dentry || !d_really_is_positive(dentry))
+ return -EINVAL;
+
+ inode = d_inode(dentry);
+ prev_mtime = inode->i_mtime;
+ sf_i = VBOXSF_I(inode);
+ sbi = VBOXSF_SBI(dentry->d_sb);
+ if (!sf_i->force_restat) {
+ if (time_before(jiffies, dentry->d_time + sbi->o.ttl))
+ return 0;
+ }
+
+ err = vboxsf_stat_dentry(dentry, &info);
+ if (err)
+ return err;
+
+ dentry->d_time = jiffies;
+ sf_i->force_restat = 0;
+ vboxsf_init_inode(sbi, inode, &info);
+
+ /*
+ * If the file was changed on the host side we need to invalidate the
+ * page-cache for it. Note this also gets triggered by our own writes,
+ * this is unavoidable.
+ */
+ if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0)
+ invalidate_inode_pages2(inode->i_mapping);
+
+ return 0;
+}
+
+int vboxsf_getattr(const struct path *path, struct kstat *kstat,
+ u32 request_mask, unsigned int flags)
+{
+ int err;
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = d_inode(dentry);
+ struct vboxsf_inode *sf_i = VBOXSF_I(inode);
+
+ switch (flags & AT_STATX_SYNC_TYPE) {
+ case AT_STATX_DONT_SYNC:
+ err = 0;
+ break;
+ case AT_STATX_FORCE_SYNC:
+ sf_i->force_restat = 1;
+ /* fall-through */
+ default:
+ err = vboxsf_inode_revalidate(dentry);
+ }
+ if (err)
+ return err;
+
+ generic_fillattr(d_inode(dentry), kstat);
+ return 0;
+}
+
+int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
+ struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
+ struct shfl_createparms params = {};
+ struct shfl_fsobjinfo info = {};
+ u32 buf_len;
+ int err;
+
+ params.handle = SHFL_HANDLE_NIL;
+ params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS |
+ SHFL_CF_ACT_FAIL_IF_NEW |
+ SHFL_CF_ACCESS_ATTR_WRITE;
+
+ /* this is at least required for Posix hosts */
+ if (iattr->ia_valid & ATTR_SIZE)
+ params.create_flags |= SHFL_CF_ACCESS_WRITE;
+
+ err = vboxsf_create_at_dentry(dentry, &params);
+ if (err || params.result != SHFL_FILE_EXISTS)
+ return err ? err : -ENOENT;
+
+#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0)
+
+ /*
+ * Setting the file size and setting the other attributes has to
+ * be handled separately.
+ */
+ if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) {
+ if (iattr->ia_valid & ATTR_MODE) {
+ info.attr.mode = mode_set(IRUSR);
+ info.attr.mode |= mode_set(IWUSR);
+ info.attr.mode |= mode_set(IXUSR);
+ info.attr.mode |= mode_set(IRGRP);
+ info.attr.mode |= mode_set(IWGRP);
+ info.attr.mode |= mode_set(IXGRP);
+ info.attr.mode |= mode_set(IROTH);
+ info.attr.mode |= mode_set(IWOTH);
+ info.attr.mode |= mode_set(IXOTH);
+
+ if (iattr->ia_mode & S_IFDIR)
+ info.attr.mode |= SHFL_TYPE_DIRECTORY;
+ else
+ info.attr.mode |= SHFL_TYPE_FILE;
+ }
+
+ if (iattr->ia_valid & ATTR_ATIME)
+ info.access_time.ns_relative_to_unix_epoch =
+ timespec64_to_ns(&iattr->ia_atime);
+
+ if (iattr->ia_valid & ATTR_MTIME)
+ info.modification_time.ns_relative_to_unix_epoch =
+ timespec64_to_ns(&iattr->ia_mtime);
+
+ /*
+ * Ignore ctime (inode change time) as it can't be set
+ * from userland anyway.
+ */
+
+ buf_len = sizeof(info);
+ err = vboxsf_fsinfo(sbi->root, params.handle,
+ SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len,
+ &info);
+ if (err) {
+ vboxsf_close(sbi->root, params.handle);
+ return err;
+ }
+
+ /* the host may have given us different attr then requested */
+ sf_i->force_restat = 1;
+ }
+
+#undef mode_set
+
+ if (iattr->ia_valid & ATTR_SIZE) {
+ memset(&info, 0, sizeof(info));
+ info.size = iattr->ia_size;
+ buf_len = sizeof(info);
+ err = vboxsf_fsinfo(sbi->root, params.handle,
+ SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len,
+ &info);
+ if (err) {
+ vboxsf_close(sbi->root, params.handle);
+ return err;
+ }
+
+ /* the host may have given us different attr then requested */
+ sf_i->force_restat = 1;
+ }
+
+ vboxsf_close(sbi->root, params.handle);
+
+ /* Update the inode with what the host has actually given us. */
+ if (sf_i->force_restat)
+ vboxsf_inode_revalidate(dentry);
+
+ return 0;
+}
+
+/*
+ * [dentry] contains string encoded in coding system that corresponds
+ * to [sbi]->nls, we must convert it to UTF8 here.
+ * Returns a shfl_string allocated through __getname (must be freed using
+ * __putname), or an ERR_PTR on error.
+ */
+struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
+ struct dentry *dentry)
+{
+ struct shfl_string *shfl_path;
+ int path_len, out_len, nb;
+ char *buf, *path;
+ wchar_t uni;
+ u8 *out;
+
+ buf = __getname();
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ path = dentry_path_raw(dentry, buf, PATH_MAX);
+ if (IS_ERR(path)) {
+ __putname(buf);
+ return ERR_CAST(path);
+ }
+ path_len = strlen(path);
+
+ if (sbi->nls) {
+ shfl_path = __getname();
+ if (!shfl_path) {
+ __putname(buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ out = shfl_path->string.utf8;
+ out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1;
+
+ while (path_len) {
+ nb = sbi->nls->char2uni(path, path_len, &uni);
+ if (nb < 0) {
+ __putname(shfl_path);
+ __putname(buf);
+ return ERR_PTR(-EINVAL);
+ }
+ path += nb;
+ path_len -= nb;
+
+ nb = utf32_to_utf8(uni, out, out_len);
+ if (nb < 0) {
+ __putname(shfl_path);
+ __putname(buf);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+ out += nb;
+ out_len -= nb;
+ }
+ *out = 0;
+ shfl_path->length = out - shfl_path->string.utf8;
+ shfl_path->size = shfl_path->length + 1;
+ __putname(buf);
+ } else {
+ if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) {
+ __putname(buf);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+ /*
+ * dentry_path stores the name at the end of buf, but the
+ * shfl_string string we return must be properly aligned.
+ */
+ shfl_path = (struct shfl_string *)buf;
+ memmove(shfl_path->string.utf8, path, path_len);
+ shfl_path->string.utf8[path_len] = 0;
+ shfl_path->length = path_len;
+ shfl_path->size = path_len + 1;
+ }
+
+ return shfl_path;
+}
+
+int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
+ const unsigned char *utf8_name, size_t utf8_len)
+{
+ const char *in;
+ char *out;
+ size_t out_len;
+ size_t out_bound_len;
+ size_t in_bound_len;
+
+ in = utf8_name;
+ in_bound_len = utf8_len;
+
+ out = name;
+ out_len = 0;
+ /* Reserve space for terminating 0 */
+ out_bound_len = name_bound_len - 1;
+
+ while (in_bound_len) {
+ int nb;
+ unicode_t uni;
+
+ nb = utf8_to_utf32(in, in_bound_len, &uni);
+ if (nb < 0)
+ return -EINVAL;
+
+ in += nb;
+ in_bound_len -= nb;
+
+ nb = sbi->nls->uni2char(uni, out, out_bound_len);
+ if (nb < 0)
+ return nb;
+
+ out += nb;
+ out_bound_len -= nb;
+ out_len += nb;
+ }
+
+ *out = 0;
+
+ return 0;
+}
+
+static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list)
+{
+ struct vboxsf_dir_buf *b;
+
+ b = kmalloc(sizeof(*b), GFP_KERNEL);
+ if (!b)
+ return NULL;
+
+ b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL);
+ if (!b->buf) {
+ kfree(b);
+ return NULL;
+ }
+
+ b->entries = 0;
+ b->used = 0;
+ b->free = DIR_BUFFER_SIZE;
+ list_add(&b->head, list);
+
+ return b;
+}
+
+static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b)
+{
+ list_del(&b->head);
+ kfree(b->buf);
+ kfree(b);
+}
+
+struct vboxsf_dir_info *vboxsf_dir_info_alloc(void)
+{
+ struct vboxsf_dir_info *p;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ INIT_LIST_HEAD(&p->info_list);
+ return p;
+}
+
+void vboxsf_dir_info_free(struct vboxsf_dir_info *p)
+{
+ struct list_head *list, *pos, *tmp;
+
+ list = &p->info_list;
+ list_for_each_safe(pos, tmp, list) {
+ struct vboxsf_dir_buf *b;
+
+ b = list_entry(pos, struct vboxsf_dir_buf, head);
+ vboxsf_dir_buf_free(b);
+ }
+ kfree(p);
+}
+
+int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
+ u64 handle)
+{
+ struct vboxsf_dir_buf *b;
+ u32 entries, size;
+ int err = 0;
+ void *buf;
+
+ /* vboxsf_dirinfo returns 1 on end of dir */
+ while (err == 0) {
+ b = vboxsf_dir_buf_alloc(&sf_d->info_list);
+ if (!b) {
+ err = -ENOMEM;
+ break;
+ }
+
+ buf = b->buf;
+ size = b->free;
+
+ err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0,
+ &size, buf, &entries);
+ if (err < 0)
+ break;
+
+ b->entries += entries;
+ b->free -= size;
+ b->used += size;
+ }
+
+ if (b && b->used == 0)
+ vboxsf_dir_buf_free(b);
+
+ /* -EILSEQ means the host could not translate a filename, ignore */
+ if (err > 0 || err == -EILSEQ)
+ err = 0;
+
+ return err;
+}
diff --git a/fs/vboxsf/vboxsf_wrappers.c b/fs/vboxsf/vboxsf_wrappers.c
new file mode 100644
index 000000000000..bfc78a097dae
--- /dev/null
+++ b/fs/vboxsf/vboxsf_wrappers.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Wrapper functions for the shfl host calls.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vbox_err.h>
+#include <linux/vbox_utils.h>
+#include "vfsmod.h"
+
+#define SHFL_REQUEST \
+ (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \
+ VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN)
+
+static u32 vboxsf_client_id;
+
+int vboxsf_connect(void)
+{
+ struct vbg_dev *gdev;
+ struct vmmdev_hgcm_service_location loc;
+ int err, vbox_status;
+
+ loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING;
+ strcpy(loc.u.localhost.service_name, "VBoxSharedFolders");
+
+ gdev = vbg_get_gdev();
+ if (IS_ERR(gdev))
+ return -ENODEV; /* No guest-device */
+
+ err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc,
+ &vboxsf_client_id, &vbox_status);
+ vbg_put_gdev(gdev);
+
+ return err ? err : vbg_status_code_to_errno(vbox_status);
+}
+
+void vboxsf_disconnect(void)
+{
+ struct vbg_dev *gdev;
+ int vbox_status;
+
+ gdev = vbg_get_gdev();
+ if (IS_ERR(gdev))
+ return; /* guest-device is gone, already disconnected */
+
+ vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status);
+ vbg_put_gdev(gdev);
+}
+
+static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status)
+{
+ struct vbg_dev *gdev;
+ int err, vbox_status;
+
+ gdev = vbg_get_gdev();
+ if (IS_ERR(gdev))
+ return -ESHUTDOWN; /* guest-dev removed underneath us */
+
+ err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function,
+ U32_MAX, parms, parm_count, &vbox_status);
+ vbg_put_gdev(gdev);
+
+ if (err < 0)
+ return err;
+
+ if (status)
+ *status = vbox_status;
+
+ return vbg_status_code_to_errno(vbox_status);
+}
+
+int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root)
+{
+ struct shfl_map_folder parms;
+ int err, status;
+
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+ parms.path.u.pointer.size = shfl_string_buf_size(folder_name);
+ parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = 0;
+
+ parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.delimiter.u.value32 = '/';
+
+ parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.case_sensitive.u.value32 = 1;
+
+ err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER,
+ &status);
+ if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED)
+ vbg_err("%s: Error host is too old\n", __func__);
+
+ *root = parms.root.u.value32;
+ return err;
+}
+
+int vboxsf_unmap_folder(u32 root)
+{
+ struct shfl_unmap_folder parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms,
+ SHFL_CPARMS_UNMAP_FOLDER, NULL);
+}
+
+/**
+ * vboxsf_create - Create a new file or folder
+ * @root: Root of the shared folder in which to create the file
+ * @parsed_path: The path of the file or folder relative to the shared folder
+ * @param: create_parms Parameters for file/folder creation.
+ *
+ * Create a new file or folder or open an existing one in a shared folder.
+ * Note this function always returns 0 / success unless an exceptional condition
+ * occurs - out of memory, invalid arguments, etc. If the file or folder could
+ * not be opened or created, create_parms->handle will be set to
+ * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result
+ * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result
+ * is also set on success as additional information.
+ *
+ * Returns:
+ * 0 or negative errno value.
+ */
+int vboxsf_create(u32 root, struct shfl_string *parsed_path,
+ struct shfl_createparms *create_parms)
+{
+ struct shfl_create parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+ parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+ parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+ parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+ parms.parms.u.pointer.size = sizeof(struct shfl_createparms);
+ parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms;
+
+ return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL);
+}
+
+int vboxsf_close(u32 root, u64 handle)
+{
+ struct shfl_close parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.handle.u.value64 = handle;
+
+ return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL);
+}
+
+int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags)
+{
+ struct shfl_remove parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+ parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+ parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.flags.u.value32 = flags;
+
+ return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL);
+}
+
+int vboxsf_rename(u32 root, struct shfl_string *src_path,
+ struct shfl_string *dest_path, u32 flags)
+{
+ struct shfl_rename parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.src.u.pointer.size = shfl_string_buf_size(src_path);
+ parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path;
+
+ parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.dest.u.pointer.size = shfl_string_buf_size(dest_path);
+ parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path;
+
+ parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.flags.u.value32 = flags;
+
+ return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL);
+}
+
+int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
+{
+ struct shfl_read parms;
+ int err;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.handle.u.value64 = handle;
+ parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.offset.u.value64 = offset;
+ parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.cb.u.value32 = *buf_len;
+ parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+ parms.buffer.u.pointer.size = *buf_len;
+ parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL);
+
+ *buf_len = parms.cb.u.value32;
+ return err;
+}
+
+int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf)
+{
+ struct shfl_write parms;
+ int err;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.handle.u.value64 = handle;
+ parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.offset.u.value64 = offset;
+ parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.cb.u.value32 = *buf_len;
+ parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.buffer.u.pointer.size = *buf_len;
+ parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL);
+
+ *buf_len = parms.cb.u.value32;
+ return err;
+}
+
+/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */
+int vboxsf_dirinfo(u32 root, u64 handle,
+ struct shfl_string *parsed_path, u32 flags, u32 index,
+ u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count)
+{
+ struct shfl_list parms;
+ int err, status;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.handle.u.value64 = handle;
+ parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.flags.u.value32 = flags;
+ parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.cb.u.value32 = *buf_len;
+ if (parsed_path) {
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+ parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+ } else {
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN;
+ parms.path.u.pointer.size = 0;
+ parms.path.u.pointer.u.linear_addr = 0;
+ }
+
+ parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+ parms.buffer.u.pointer.size = *buf_len;
+ parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.resume_point.u.value32 = index;
+ parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.file_count.u.value32 = 0; /* out parameter only */
+
+ err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status);
+ if (err == -ENODATA && status == VERR_NO_MORE_FILES)
+ err = 1;
+
+ *buf_len = parms.cb.u.value32;
+ *file_count = parms.file_count.u.value32;
+ return err;
+}
+
+int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
+ u32 *buf_len, void *buf)
+{
+ struct shfl_information parms;
+ int err;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT;
+ parms.handle.u.value64 = handle;
+ parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.flags.u.value32 = flags;
+ parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.cb.u.value32 = *buf_len;
+ parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL;
+ parms.info.u.pointer.size = *buf_len;
+ parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION,
+ NULL);
+
+ *buf_len = parms.cb.u.value32;
+ return err;
+}
+
+int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
+ u32 buf_len, u8 *buf)
+{
+ struct shfl_readLink parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.path.u.pointer.size = shfl_string_buf_size(parsed_path);
+ parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path;
+
+ parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+ parms.buffer.u.pointer.size = buf_len;
+ parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK,
+ NULL);
+}
+
+int vboxsf_symlink(u32 root, struct shfl_string *new_path,
+ struct shfl_string *old_path, struct shfl_fsobjinfo *buf)
+{
+ struct shfl_symlink parms;
+
+ parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT;
+ parms.root.u.value32 = root;
+
+ parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.new_path.u.pointer.size = shfl_string_buf_size(new_path);
+ parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path;
+
+ parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN;
+ parms.old_path.u.pointer.size = shfl_string_buf_size(old_path);
+ parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path;
+
+ parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT;
+ parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo);
+ parms.info.u.pointer.u.linear_addr = (uintptr_t)buf;
+
+ return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL);
+}
+
+int vboxsf_set_utf8(void)
+{
+ return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL);
+}
+
+int vboxsf_set_symlinks(void)
+{
+ return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL);
+}
diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h
new file mode 100644
index 000000000000..18f95b00fc33
--- /dev/null
+++ b/fs/vboxsf/vfsmod.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * VirtualBox Guest Shared Folders support: module header.
+ *
+ * Copyright (C) 2006-2018 Oracle Corporation
+ */
+
+#ifndef VFSMOD_H
+#define VFSMOD_H
+
+#include <linux/backing-dev.h>
+#include <linux/idr.h>
+#include "shfl_hostintf.h"
+
+#define DIR_BUFFER_SIZE SZ_16K
+
+/* The cast is to prevent assignment of void * to pointers of arbitrary type */
+#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info)
+#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode)
+
+struct vboxsf_options {
+ unsigned long ttl;
+ kuid_t uid;
+ kgid_t gid;
+ bool dmode_set;
+ bool fmode_set;
+ umode_t dmode;
+ umode_t fmode;
+ umode_t dmask;
+ umode_t fmask;
+};
+
+struct vboxsf_fs_context {
+ struct vboxsf_options o;
+ char *nls_name;
+};
+
+/* per-shared folder information */
+struct vboxsf_sbi {
+ struct vboxsf_options o;
+ struct shfl_fsobjinfo root_info;
+ struct idr ino_idr;
+ spinlock_t ino_idr_lock; /* This protects ino_idr */
+ struct nls_table *nls;
+ u32 next_generation;
+ u32 root;
+ int bdi_id;
+};
+
+/* per-inode information */
+struct vboxsf_inode {
+ /* some information was changed, update data on next revalidate */
+ int force_restat;
+ /* list of open handles for this inode + lock protecting it */
+ struct list_head handle_list;
+ /* This mutex protects handle_list accesses */
+ struct mutex handle_list_mutex;
+ /* The VFS inode struct */
+ struct inode vfs_inode;
+};
+
+struct vboxsf_dir_info {
+ struct list_head info_list;
+};
+
+struct vboxsf_dir_buf {
+ size_t entries;
+ size_t free;
+ size_t used;
+ void *buf;
+ struct list_head head;
+};
+
+/* globals */
+extern const struct inode_operations vboxsf_dir_iops;
+extern const struct inode_operations vboxsf_lnk_iops;
+extern const struct inode_operations vboxsf_reg_iops;
+extern const struct file_operations vboxsf_dir_fops;
+extern const struct file_operations vboxsf_reg_fops;
+extern const struct address_space_operations vboxsf_reg_aops;
+extern const struct dentry_operations vboxsf_dentry_ops;
+
+/* from utils.c */
+struct inode *vboxsf_new_inode(struct super_block *sb);
+void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
+ const struct shfl_fsobjinfo *info);
+int vboxsf_create_at_dentry(struct dentry *dentry,
+ struct shfl_createparms *params);
+int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
+ struct shfl_fsobjinfo *info);
+int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
+int vboxsf_inode_revalidate(struct dentry *dentry);
+int vboxsf_getattr(const struct path *path, struct kstat *kstat,
+ u32 request_mask, unsigned int query_flags);
+int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr);
+struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
+ struct dentry *dentry);
+int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
+ const unsigned char *utf8_name, size_t utf8_len);
+struct vboxsf_dir_info *vboxsf_dir_info_alloc(void);
+void vboxsf_dir_info_free(struct vboxsf_dir_info *p);
+int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d,
+ u64 handle);
+
+/* from vboxsf_wrappers.c */
+int vboxsf_connect(void);
+void vboxsf_disconnect(void);
+
+int vboxsf_create(u32 root, struct shfl_string *parsed_path,
+ struct shfl_createparms *create_parms);
+
+int vboxsf_close(u32 root, u64 handle);
+int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags);
+int vboxsf_rename(u32 root, struct shfl_string *src_path,
+ struct shfl_string *dest_path, u32 flags);
+
+int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
+int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf);
+
+int vboxsf_dirinfo(u32 root, u64 handle,
+ struct shfl_string *parsed_path, u32 flags, u32 index,
+ u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count);
+int vboxsf_fsinfo(u32 root, u64 handle, u32 flags,
+ u32 *buf_len, void *buf);
+
+int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root);
+int vboxsf_unmap_folder(u32 root);
+
+int vboxsf_readlink(u32 root, struct shfl_string *parsed_path,
+ u32 buf_len, u8 *buf);
+int vboxsf_symlink(u32 root, struct shfl_string *new_path,
+ struct shfl_string *old_path, struct shfl_fsobjinfo *buf);
+
+int vboxsf_set_utf8(void);
+int vboxsf_set_symlinks(void);
+
+#endif
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a688eb5c5ae..58e937be24ce 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -587,7 +587,7 @@ xfs_dax_writepages(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
return dax_writeback_mapping_range(mapping,
- xfs_inode_buftarg(ip)->bt_bdev, wbc);
+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
}
STATIC sector_t
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 760901783944..2094386af8ac 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -62,7 +62,7 @@ enum {
Opt_discard, Opt_nodiscard, Opt_dax,
};
-static const struct fs_parameter_spec xfs_param_specs[] = {
+static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_u32("logbufs", Opt_logbufs),
fsparam_string("logbsize", Opt_logbsize),
fsparam_string("logdev", Opt_logdev),
@@ -106,11 +106,6 @@ static const struct fs_parameter_spec xfs_param_specs[] = {
{}
};
-static const struct fs_parameter_description xfs_fs_parameters = {
- .name = "xfs",
- .specs = xfs_param_specs,
-};
-
struct proc_xfs_info {
uint64_t flag;
char *str;
@@ -1120,7 +1115,7 @@ xfs_fc_parse_param(
int size = 0;
int opt;
- opt = fs_parse(fc, &xfs_fs_parameters, param, &result);
+ opt = fs_parse(fc, xfs_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -1782,7 +1777,7 @@ static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
.init_fs_context = xfs_init_fs_context,
- .parameters = &xfs_fs_parameters,
+ .parameters = xfs_fs_parameters,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig
new file mode 100644
index 000000000000..fb87ad372e29
--- /dev/null
+++ b/fs/zonefs/Kconfig
@@ -0,0 +1,9 @@
+config ZONEFS_FS
+ tristate "zonefs filesystem support"
+ depends on BLOCK
+ depends on BLK_DEV_ZONED
+ help
+ zonefs is a simple file system which exposes zones of a zoned block
+ device (e.g. host-managed or host-aware SMR disk drives) as files.
+
+ If unsure, say N.
diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile
new file mode 100644
index 000000000000..75a380aa1ae1
--- /dev/null
+++ b/fs/zonefs/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ZONEFS_FS) += zonefs.o
+
+zonefs-y := super.o
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
new file mode 100644
index 000000000000..8bc6ef82d693
--- /dev/null
+++ b/fs/zonefs/super.c
@@ -0,0 +1,1439 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple file system for zoned block devices exposing zones as files.
+ *
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <linux/iomap.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/statfs.h>
+#include <linux/writeback.h>
+#include <linux/quotaops.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
+#include <linux/uio.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/crc32.h>
+
+#include "zonefs.h"
+
+static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ loff_t isize;
+
+ /* All I/Os should always be within the file maximum size */
+ if (WARN_ON_ONCE(offset + length > zi->i_max_size))
+ return -EIO;
+
+ /*
+ * Sequential zones can only accept direct writes. This is already
+ * checked when writes are issued, so warn if we see a page writeback
+ * operation.
+ */
+ if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+ (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)))
+ return -EIO;
+
+ /*
+ * For conventional zones, all blocks are always mapped. For sequential
+ * zones, all blocks after always mapped below the inode size (zone
+ * write pointer) and unwriten beyond.
+ */
+ mutex_lock(&zi->i_truncate_mutex);
+ isize = i_size_read(inode);
+ if (offset >= isize)
+ iomap->type = IOMAP_UNWRITTEN;
+ else
+ iomap->type = IOMAP_MAPPED;
+ if (flags & IOMAP_WRITE)
+ length = zi->i_max_size - offset;
+ else
+ length = min(length, isize - offset);
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+ iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+
+ return 0;
+}
+
+static const struct iomap_ops zonefs_iomap_ops = {
+ .iomap_begin = zonefs_iomap_begin,
+};
+
+static int zonefs_readpage(struct file *unused, struct page *page)
+{
+ return iomap_readpage(page, &zonefs_iomap_ops);
+}
+
+static int zonefs_readpages(struct file *unused, struct address_space *mapping,
+ struct list_head *pages, unsigned int nr_pages)
+{
+ return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops);
+}
+
+/*
+ * Map blocks for page writeback. This is used only on conventional zone files,
+ * which implies that the page range can only be within the fixed inode size.
+ */
+static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
+ struct inode *inode, loff_t offset)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+ return -EIO;
+ if (WARN_ON_ONCE(offset >= i_size_read(inode)))
+ return -EIO;
+
+ /* If the mapping is already OK, nothing needs to be done */
+ if (offset >= wpc->iomap.offset &&
+ offset < wpc->iomap.offset + wpc->iomap.length)
+ return 0;
+
+ return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset,
+ IOMAP_WRITE, &wpc->iomap, NULL);
+}
+
+static const struct iomap_writeback_ops zonefs_writeback_ops = {
+ .map_blocks = zonefs_map_blocks,
+};
+
+static int zonefs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct iomap_writepage_ctx wpc = { };
+
+ return iomap_writepage(page, wbc, &wpc, &zonefs_writeback_ops);
+}
+
+static int zonefs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct iomap_writepage_ctx wpc = { };
+
+ return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
+}
+
+static const struct address_space_operations zonefs_file_aops = {
+ .readpage = zonefs_readpage,
+ .readpages = zonefs_readpages,
+ .writepage = zonefs_writepage,
+ .writepages = zonefs_writepages,
+ .set_page_dirty = iomap_set_page_dirty,
+ .releasepage = iomap_releasepage,
+ .invalidatepage = iomap_invalidatepage,
+ .migratepage = iomap_migrate_page,
+ .is_partially_uptodate = iomap_is_partially_uptodate,
+ .error_remove_page = generic_error_remove_page,
+ .direct_IO = noop_direct_IO,
+};
+
+static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
+{
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ loff_t old_isize = i_size_read(inode);
+ loff_t nr_blocks;
+
+ if (new_isize == old_isize)
+ return;
+
+ spin_lock(&sbi->s_lock);
+
+ /*
+ * This may be called for an update after an IO error.
+ * So beware of the values seen.
+ */
+ if (new_isize < old_isize) {
+ nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits;
+ if (sbi->s_used_blocks > nr_blocks)
+ sbi->s_used_blocks -= nr_blocks;
+ else
+ sbi->s_used_blocks = 0;
+ } else {
+ sbi->s_used_blocks +=
+ (new_isize - old_isize) >> sb->s_blocksize_bits;
+ if (sbi->s_used_blocks > sbi->s_blocks)
+ sbi->s_used_blocks = sbi->s_blocks;
+ }
+
+ spin_unlock(&sbi->s_lock);
+}
+
+/*
+ * Check a zone condition and adjust its file inode access permissions for
+ * offline and readonly zones. Return the inode size corresponding to the
+ * amount of readable data in the zone.
+ */
+static loff_t zonefs_check_zone_condition(struct inode *inode,
+ struct blk_zone *zone, bool warn)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ switch (zone->cond) {
+ case BLK_ZONE_COND_OFFLINE:
+ /*
+ * Dead zone: make the inode immutable, disable all accesses
+ * and set the file size to 0 (zone wp set to zone start).
+ */
+ if (warn)
+ zonefs_warn(inode->i_sb, "inode %lu: offline zone\n",
+ inode->i_ino);
+ inode->i_flags |= S_IMMUTABLE;
+ inode->i_mode &= ~0777;
+ zone->wp = zone->start;
+ return 0;
+ case BLK_ZONE_COND_READONLY:
+ /* Do not allow writes in read-only zones */
+ if (warn)
+ zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n",
+ inode->i_ino);
+ inode->i_flags |= S_IMMUTABLE;
+ inode->i_mode &= ~0222;
+ /* fallthrough */
+ default:
+ if (zi->i_ztype == ZONEFS_ZTYPE_CNV)
+ return zi->i_max_size;
+ return (zone->wp - zone->start) << SECTOR_SHIFT;
+ }
+}
+
+struct zonefs_ioerr_data {
+ struct inode *inode;
+ bool write;
+};
+
+static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct zonefs_ioerr_data *err = data;
+ struct inode *inode = err->inode;
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ loff_t isize, data_size;
+
+ /*
+ * Check the zone condition: if the zone is not "bad" (offline or
+ * read-only), read errors are simply signaled to the IO issuer as long
+ * as there is no inconsistency between the inode size and the amount of
+ * data writen in the zone (data_size).
+ */
+ data_size = zonefs_check_zone_condition(inode, zone, true);
+ isize = i_size_read(inode);
+ if (zone->cond != BLK_ZONE_COND_OFFLINE &&
+ zone->cond != BLK_ZONE_COND_READONLY &&
+ !err->write && isize == data_size)
+ return 0;
+
+ /*
+ * At this point, we detected either a bad zone or an inconsistency
+ * between the inode size and the amount of data written in the zone.
+ * For the latter case, the cause may be a write IO error or an external
+ * action on the device. Two error patterns exist:
+ * 1) The inode size is lower than the amount of data in the zone:
+ * a write operation partially failed and data was writen at the end
+ * of the file. This can happen in the case of a large direct IO
+ * needing several BIOs and/or write requests to be processed.
+ * 2) The inode size is larger than the amount of data in the zone:
+ * this can happen with a deferred write error with the use of the
+ * device side write cache after getting successful write IO
+ * completions. Other possibilities are (a) an external corruption,
+ * e.g. an application reset the zone directly, or (b) the device
+ * has a serious problem (e.g. firmware bug).
+ *
+ * In all cases, warn about inode size inconsistency and handle the
+ * IO error according to the zone condition and to the mount options.
+ */
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size)
+ zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
+ inode->i_ino, isize, data_size);
+
+ /*
+ * First handle bad zones signaled by hardware. The mount options
+ * errors=zone-ro and errors=zone-offline result in changing the
+ * zone condition to read-only and offline respectively, as if the
+ * condition was signaled by the hardware.
+ */
+ if (zone->cond == BLK_ZONE_COND_OFFLINE ||
+ sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) {
+ zonefs_warn(sb, "inode %lu: read/write access disabled\n",
+ inode->i_ino);
+ if (zone->cond != BLK_ZONE_COND_OFFLINE) {
+ zone->cond = BLK_ZONE_COND_OFFLINE;
+ data_size = zonefs_check_zone_condition(inode, zone,
+ false);
+ }
+ } else if (zone->cond == BLK_ZONE_COND_READONLY ||
+ sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) {
+ zonefs_warn(sb, "inode %lu: write access disabled\n",
+ inode->i_ino);
+ if (zone->cond != BLK_ZONE_COND_READONLY) {
+ zone->cond = BLK_ZONE_COND_READONLY;
+ data_size = zonefs_check_zone_condition(inode, zone,
+ false);
+ }
+ }
+
+ /*
+ * If error=remount-ro was specified, any error result in remounting
+ * the volume as read-only.
+ */
+ if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) {
+ zonefs_warn(sb, "remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ }
+
+ /*
+ * Update block usage stats and the inode size to prevent access to
+ * invalid data.
+ */
+ zonefs_update_stats(inode, data_size);
+ i_size_write(inode, data_size);
+ zi->i_wpoffset = data_size;
+
+ return 0;
+}
+
+/*
+ * When an file IO error occurs, check the file zone to see if there is a change
+ * in the zone condition (e.g. offline or read-only). For a failed write to a
+ * sequential zone, the zone write pointer position must also be checked to
+ * eventually correct the file size and zonefs inode write pointer offset
+ * (which can be out of sync with the drive due to partial write failures).
+ */
+static void zonefs_io_error(struct inode *inode, bool write)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ unsigned int noio_flag;
+ unsigned int nr_zones =
+ zi->i_max_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ struct zonefs_ioerr_data err = {
+ .inode = inode,
+ .write = write,
+ };
+ int ret;
+
+ mutex_lock(&zi->i_truncate_mutex);
+
+ /*
+ * Memory allocations in blkdev_report_zones() can trigger a memory
+ * reclaim which may in turn cause a recursion into zonefs as well as
+ * struct request allocations for the same device. The former case may
+ * end up in a deadlock on the inode truncate mutex, while the latter
+ * may prevent IO forward progress. Executing the report zones under
+ * the GFP_NOIO context avoids both problems.
+ */
+ noio_flag = memalloc_noio_save();
+ ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
+ zonefs_io_error_cb, &err);
+ if (ret != nr_zones)
+ zonefs_err(sb, "Get inode %lu zone information failed %d\n",
+ inode->i_ino, ret);
+ memalloc_noio_restore(noio_flag);
+
+ mutex_unlock(&zi->i_truncate_mutex);
+}
+
+static int zonefs_file_truncate(struct inode *inode, loff_t isize)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ loff_t old_isize;
+ enum req_opf op;
+ int ret = 0;
+
+ /*
+ * Only sequential zone files can be truncated and truncation is allowed
+ * only down to a 0 size, which is equivalent to a zone reset, and to
+ * the maximum file size, which is equivalent to a zone finish.
+ */
+ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+ return -EPERM;
+
+ if (!isize)
+ op = REQ_OP_ZONE_RESET;
+ else if (isize == zi->i_max_size)
+ op = REQ_OP_ZONE_FINISH;
+ else
+ return -EPERM;
+
+ inode_dio_wait(inode);
+
+ /* Serialize against page faults */
+ down_write(&zi->i_mmap_sem);
+
+ /* Serialize against zonefs_iomap_begin() */
+ mutex_lock(&zi->i_truncate_mutex);
+
+ old_isize = i_size_read(inode);
+ if (isize == old_isize)
+ goto unlock;
+
+ ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector,
+ zi->i_max_size >> SECTOR_SHIFT, GFP_NOFS);
+ if (ret) {
+ zonefs_err(inode->i_sb,
+ "Zone management operation at %llu failed %d",
+ zi->i_zsector, ret);
+ goto unlock;
+ }
+
+ zonefs_update_stats(inode, isize);
+ truncate_setsize(inode, isize);
+ zi->i_wpoffset = isize;
+
+unlock:
+ mutex_unlock(&zi->i_truncate_mutex);
+ up_write(&zi->i_mmap_sem);
+
+ return ret;
+}
+
+static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = d_inode(dentry);
+ int ret;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ ret = setattr_prepare(dentry, iattr);
+ if (ret)
+ return ret;
+
+ /*
+ * Since files and directories cannot be created nor deleted, do not
+ * allow setting any write attributes on the sub-directories grouping
+ * files by zone type.
+ */
+ if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) &&
+ (iattr->ia_mode & 0222))
+ return -EPERM;
+
+ if (((iattr->ia_valid & ATTR_UID) &&
+ !uid_eq(iattr->ia_uid, inode->i_uid)) ||
+ ((iattr->ia_valid & ATTR_GID) &&
+ !gid_eq(iattr->ia_gid, inode->i_gid))) {
+ ret = dquot_transfer(inode, iattr);
+ if (ret)
+ return ret;
+ }
+
+ if (iattr->ia_valid & ATTR_SIZE) {
+ ret = zonefs_file_truncate(inode, iattr->ia_size);
+ if (ret)
+ return ret;
+ }
+
+ setattr_copy(inode, iattr);
+
+ return 0;
+}
+
+static const struct inode_operations zonefs_file_inode_operations = {
+ .setattr = zonefs_inode_setattr,
+};
+
+static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *inode = file_inode(file);
+ int ret = 0;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ /*
+ * Since only direct writes are allowed in sequential files, page cache
+ * flush is needed only for conventional zone files.
+ */
+ if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
+ ret = file_write_and_wait_range(file, start, end);
+ if (!ret)
+ ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+
+ if (ret)
+ zonefs_io_error(inode, true);
+
+ return ret;
+}
+
+static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file));
+ vm_fault_t ret;
+
+ down_read(&zi->i_mmap_sem);
+ ret = filemap_fault(vmf);
+ up_read(&zi->i_mmap_sem);
+
+ return ret;
+}
+
+static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ vm_fault_t ret;
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * Sanity check: only conventional zone files can have shared
+ * writeable mappings.
+ */
+ if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV))
+ return VM_FAULT_NOPAGE;
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vmf->vma->vm_file);
+
+ /* Serialize against truncates */
+ down_read(&zi->i_mmap_sem);
+ ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
+ up_read(&zi->i_mmap_sem);
+
+ sb_end_pagefault(inode->i_sb);
+ return ret;
+}
+
+static const struct vm_operations_struct zonefs_file_vm_ops = {
+ .fault = zonefs_filemap_fault,
+ .map_pages = filemap_map_pages,
+ .page_mkwrite = zonefs_filemap_page_mkwrite,
+};
+
+static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /*
+ * Conventional zones accept random writes, so their files can support
+ * shared writable mappings. For sequential zone files, only read
+ * mappings are possible since there are no guarantees for write
+ * ordering between msync() and page cache writeback.
+ */
+ if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ &&
+ (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ return -EINVAL;
+
+ file_accessed(file);
+ vma->vm_ops = &zonefs_file_vm_ops;
+
+ return 0;
+}
+
+static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
+{
+ loff_t isize = i_size_read(file_inode(file));
+
+ /*
+ * Seeks are limited to below the zone size for conventional zones
+ * and below the zone write pointer for sequential zones. In both
+ * cases, this limit is the inode size.
+ */
+ return generic_file_llseek_size(file, offset, whence, isize, isize);
+}
+
+static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ if (error) {
+ zonefs_io_error(inode, true);
+ return error;
+ }
+
+ if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+ /*
+ * Note that we may be seeing completions out of order,
+ * but that is not a problem since a write completed
+ * successfully necessarily means that all preceding writes
+ * were also successful. So we can safely increase the inode
+ * size to the write end location.
+ */
+ mutex_lock(&zi->i_truncate_mutex);
+ if (i_size_read(inode) < iocb->ki_pos + size) {
+ zonefs_update_stats(inode, iocb->ki_pos + size);
+ i_size_write(inode, iocb->ki_pos + size);
+ }
+ mutex_unlock(&zi->i_truncate_mutex);
+ }
+
+ return 0;
+}
+
+static const struct iomap_dio_ops zonefs_write_dio_ops = {
+ .end_io = zonefs_file_write_dio_end_io,
+};
+
+/*
+ * Handle direct writes. For sequential zone files, this is the only possible
+ * write path. For these files, check that the user is issuing writes
+ * sequentially from the end of the file. This code assumes that the block layer
+ * delivers write requests to the device in sequential order. This is always the
+ * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE
+ * elevator feature is being used (e.g. mq-deadline). The block layer always
+ * automatically select such an elevator for zoned block devices during the
+ * device initialization.
+ */
+static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ size_t count;
+ ssize_t ret;
+
+ /*
+ * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT
+ * as this can cause write reordering (e.g. the first aio gets EAGAIN
+ * on the inode lock but the second goes through but is now unaligned).
+ */
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb)
+ && (iocb->ki_flags & IOCB_NOWAIT))
+ iocb->ki_flags &= ~IOCB_NOWAIT;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto inode_unlock;
+
+ iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
+ count = iov_iter_count(from);
+
+ if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+ ret = -EINVAL;
+ goto inode_unlock;
+ }
+
+ /* Enforce sequential writes (append only) in sequential zones */
+ mutex_lock(&zi->i_truncate_mutex);
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) {
+ mutex_unlock(&zi->i_truncate_mutex);
+ ret = -EINVAL;
+ goto inode_unlock;
+ }
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+ &zonefs_write_dio_ops, is_sync_kiocb(iocb));
+ if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
+ (ret > 0 || ret == -EIOCBQUEUED)) {
+ if (ret > 0)
+ count = ret;
+ mutex_lock(&zi->i_truncate_mutex);
+ zi->i_wpoffset += count;
+ mutex_unlock(&zi->i_truncate_mutex);
+ }
+
+inode_unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ ssize_t ret;
+
+ /*
+ * Direct IO writes are mandatory for sequential zone files so that the
+ * write IO issuing order is preserved.
+ */
+ if (zi->i_ztype != ZONEFS_ZTYPE_CNV)
+ return -EIO;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock(inode);
+ }
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto inode_unlock;
+
+ iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
+
+ ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
+ if (ret > 0)
+ iocb->ki_pos += ret;
+ else if (ret == -EIO)
+ zonefs_io_error(inode, true);
+
+inode_unlock:
+ inode_unlock(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+
+ return ret;
+}
+
+static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
+ if (sb_rdonly(inode->i_sb))
+ return -EROFS;
+
+ /* Write operations beyond the zone size are not allowed */
+ if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size)
+ return -EFBIG;
+
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return zonefs_file_dio_write(iocb, from);
+
+ return zonefs_file_buffered_write(iocb, from);
+}
+
+static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size,
+ int error, unsigned int flags)
+{
+ if (error) {
+ zonefs_io_error(file_inode(iocb->ki_filp), false);
+ return error;
+ }
+
+ return 0;
+}
+
+static const struct iomap_dio_ops zonefs_read_dio_ops = {
+ .end_io = zonefs_file_read_dio_end_io,
+};
+
+static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ loff_t isize;
+ ssize_t ret;
+
+ /* Offline zones cannot be read */
+ if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777)))
+ return -EPERM;
+
+ if (iocb->ki_pos >= zi->i_max_size)
+ return 0;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock_shared(inode);
+ }
+
+ /* Limit read operations to written data */
+ mutex_lock(&zi->i_truncate_mutex);
+ isize = i_size_read(inode);
+ if (iocb->ki_pos >= isize) {
+ mutex_unlock(&zi->i_truncate_mutex);
+ ret = 0;
+ goto inode_unlock;
+ }
+ iov_iter_truncate(to, isize - iocb->ki_pos);
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ size_t count = iov_iter_count(to);
+
+ if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
+ ret = -EINVAL;
+ goto inode_unlock;
+ }
+ file_accessed(iocb->ki_filp);
+ ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
+ &zonefs_read_dio_ops, is_sync_kiocb(iocb));
+ } else {
+ ret = generic_file_read_iter(iocb, to);
+ if (ret == -EIO)
+ zonefs_io_error(inode, false);
+ }
+
+inode_unlock:
+ inode_unlock_shared(inode);
+
+ return ret;
+}
+
+static const struct file_operations zonefs_file_operations = {
+ .open = generic_file_open,
+ .fsync = zonefs_file_fsync,
+ .mmap = zonefs_file_mmap,
+ .llseek = zonefs_file_llseek,
+ .read_iter = zonefs_file_read_iter,
+ .write_iter = zonefs_file_write_iter,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
+ .iopoll = iomap_dio_iopoll,
+};
+
+static struct kmem_cache *zonefs_inode_cachep;
+
+static struct inode *zonefs_alloc_inode(struct super_block *sb)
+{
+ struct zonefs_inode_info *zi;
+
+ zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL);
+ if (!zi)
+ return NULL;
+
+ inode_init_once(&zi->i_vnode);
+ mutex_init(&zi->i_truncate_mutex);
+ init_rwsem(&zi->i_mmap_sem);
+
+ return &zi->i_vnode;
+}
+
+static void zonefs_free_inode(struct inode *inode)
+{
+ kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode));
+}
+
+/*
+ * File system stat.
+ */
+static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct super_block *sb = dentry->d_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ enum zonefs_ztype t;
+ u64 fsid;
+
+ buf->f_type = ZONEFS_MAGIC;
+ buf->f_bsize = sb->s_blocksize;
+ buf->f_namelen = ZONEFS_NAME_MAX;
+
+ spin_lock(&sbi->s_lock);
+
+ buf->f_blocks = sbi->s_blocks;
+ if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks))
+ buf->f_bfree = 0;
+ else
+ buf->f_bfree = buf->f_blocks - sbi->s_used_blocks;
+ buf->f_bavail = buf->f_bfree;
+
+ for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+ if (sbi->s_nr_files[t])
+ buf->f_files += sbi->s_nr_files[t] + 1;
+ }
+ buf->f_ffree = 0;
+
+ spin_unlock(&sbi->s_lock);
+
+ fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^
+ le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64));
+ buf->f_fsid.val[0] = (u32)fsid;
+ buf->f_fsid.val[1] = (u32)(fsid >> 32);
+
+ return 0;
+}
+
+enum {
+ Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair,
+ Opt_err,
+};
+
+static const match_table_t tokens = {
+ { Opt_errors_ro, "errors=remount-ro"},
+ { Opt_errors_zro, "errors=zone-ro"},
+ { Opt_errors_zol, "errors=zone-offline"},
+ { Opt_errors_repair, "errors=repair"},
+ { Opt_err, NULL}
+};
+
+static int zonefs_parse_options(struct super_block *sb, char *options)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
+
+ if (!options)
+ return 0;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_errors_ro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO;
+ break;
+ case Opt_errors_zro:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO;
+ break;
+ case Opt_errors_zol:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL;
+ break;
+ case Opt_errors_repair:
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK;
+ sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int zonefs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb);
+
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO)
+ seq_puts(seq, ",errors=remount-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO)
+ seq_puts(seq, ",errors=zone-ro");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL)
+ seq_puts(seq, ",errors=zone-offline");
+ if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR)
+ seq_puts(seq, ",errors=repair");
+
+ return 0;
+}
+
+static int zonefs_remount(struct super_block *sb, int *flags, char *data)
+{
+ sync_filesystem(sb);
+
+ return zonefs_parse_options(sb, data);
+}
+
+static const struct super_operations zonefs_sops = {
+ .alloc_inode = zonefs_alloc_inode,
+ .free_inode = zonefs_free_inode,
+ .statfs = zonefs_statfs,
+ .remount_fs = zonefs_remount,
+ .show_options = zonefs_show_options,
+};
+
+static const struct inode_operations zonefs_dir_inode_operations = {
+ .lookup = simple_lookup,
+ .setattr = zonefs_inode_setattr,
+};
+
+static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
+ enum zonefs_ztype type)
+{
+ struct super_block *sb = parent->i_sb;
+
+ inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
+ inode_init_owner(inode, parent, S_IFDIR | 0555);
+ inode->i_op = &zonefs_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ set_nlink(inode, 2);
+ inc_nlink(parent);
+}
+
+static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone,
+ enum zonefs_ztype type)
+{
+ struct super_block *sb = inode->i_sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+ inode->i_ino = zone->start >> sbi->s_zone_sectors_shift;
+ inode->i_mode = S_IFREG | sbi->s_perm;
+
+ zi->i_ztype = type;
+ zi->i_zsector = zone->start;
+ zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE,
+ zone->len << SECTOR_SHIFT);
+ zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true);
+
+ inode->i_uid = sbi->s_uid;
+ inode->i_gid = sbi->s_gid;
+ inode->i_size = zi->i_wpoffset;
+ inode->i_blocks = zone->len;
+
+ inode->i_op = &zonefs_file_inode_operations;
+ inode->i_fop = &zonefs_file_operations;
+ inode->i_mapping->a_ops = &zonefs_file_aops;
+
+ sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes);
+ sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits;
+ sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits;
+}
+
+static struct dentry *zonefs_create_inode(struct dentry *parent,
+ const char *name, struct blk_zone *zone,
+ enum zonefs_ztype type)
+{
+ struct inode *dir = d_inode(parent);
+ struct dentry *dentry;
+ struct inode *inode;
+
+ dentry = d_alloc_name(parent, name);
+ if (!dentry)
+ return NULL;
+
+ inode = new_inode(parent->d_sb);
+ if (!inode)
+ goto dput;
+
+ inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+ if (zone)
+ zonefs_init_file_inode(inode, zone, type);
+ else
+ zonefs_init_dir_inode(dir, inode, type);
+ d_add(dentry, inode);
+ dir->i_size++;
+
+ return dentry;
+
+dput:
+ dput(dentry);
+
+ return NULL;
+}
+
+struct zonefs_zone_data {
+ struct super_block *sb;
+ unsigned int nr_zones[ZONEFS_ZTYPE_MAX];
+ struct blk_zone *zones;
+};
+
+/*
+ * Create a zone group and populate it with zone files.
+ */
+static int zonefs_create_zgroup(struct zonefs_zone_data *zd,
+ enum zonefs_ztype type)
+{
+ struct super_block *sb = zd->sb;
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct blk_zone *zone, *next, *end;
+ const char *zgroup_name;
+ char *file_name;
+ struct dentry *dir;
+ unsigned int n = 0;
+ int ret = -ENOMEM;
+
+ /* If the group is empty, there is nothing to do */
+ if (!zd->nr_zones[type])
+ return 0;
+
+ file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL);
+ if (!file_name)
+ return -ENOMEM;
+
+ if (type == ZONEFS_ZTYPE_CNV)
+ zgroup_name = "cnv";
+ else
+ zgroup_name = "seq";
+
+ dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type);
+ if (!dir)
+ goto free;
+
+ /*
+ * The first zone contains the super block: skip it.
+ */
+ end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk);
+ for (zone = &zd->zones[1]; zone < end; zone = next) {
+
+ next = zone + 1;
+ if (zonefs_zone_type(zone) != type)
+ continue;
+
+ /*
+ * For conventional zones, contiguous zones can be aggregated
+ * together to form larger files. Note that this overwrites the
+ * length of the first zone of the set of contiguous zones
+ * aggregated together. If one offline or read-only zone is
+ * found, assume that all zones aggregated have the same
+ * condition.
+ */
+ if (type == ZONEFS_ZTYPE_CNV &&
+ (sbi->s_features & ZONEFS_F_AGGRCNV)) {
+ for (; next < end; next++) {
+ if (zonefs_zone_type(next) != type)
+ break;
+ zone->len += next->len;
+ if (next->cond == BLK_ZONE_COND_READONLY &&
+ zone->cond != BLK_ZONE_COND_OFFLINE)
+ zone->cond = BLK_ZONE_COND_READONLY;
+ else if (next->cond == BLK_ZONE_COND_OFFLINE)
+ zone->cond = BLK_ZONE_COND_OFFLINE;
+ }
+ }
+
+ /*
+ * Use the file number within its group as file name.
+ */
+ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n);
+ if (!zonefs_create_inode(dir, file_name, zone, type))
+ goto free;
+
+ n++;
+ }
+
+ zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
+ zgroup_name, n, n > 1 ? "s" : "");
+
+ sbi->s_nr_files[type] = n;
+ ret = 0;
+
+free:
+ kfree(file_name);
+
+ return ret;
+}
+
+static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct zonefs_zone_data *zd = data;
+
+ /*
+ * Count the number of usable zones: the first zone at index 0 contains
+ * the super block and is ignored.
+ */
+ switch (zone->type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
+ zone->wp = zone->start + zone->len;
+ if (idx)
+ zd->nr_zones[ZONEFS_ZTYPE_CNV]++;
+ break;
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ if (idx)
+ zd->nr_zones[ZONEFS_ZTYPE_SEQ]++;
+ break;
+ default:
+ zonefs_err(zd->sb, "Unsupported zone type 0x%x\n",
+ zone->type);
+ return -EIO;
+ }
+
+ memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone));
+
+ return 0;
+}
+
+static int zonefs_get_zone_info(struct zonefs_zone_data *zd)
+{
+ struct block_device *bdev = zd->sb->s_bdev;
+ int ret;
+
+ zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk),
+ sizeof(struct blk_zone), GFP_KERNEL);
+ if (!zd->zones)
+ return -ENOMEM;
+
+ /* Get zones information from the device */
+ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES,
+ zonefs_get_zone_info_cb, zd);
+ if (ret < 0) {
+ zonefs_err(zd->sb, "Zone report failed %d\n", ret);
+ return ret;
+ }
+
+ if (ret != blkdev_nr_zones(bdev->bd_disk)) {
+ zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n",
+ ret, blkdev_nr_zones(bdev->bd_disk));
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd)
+{
+ kvfree(zd->zones);
+}
+
+/*
+ * Read super block information from the device.
+ */
+static int zonefs_read_super(struct super_block *sb)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+ struct zonefs_super *super;
+ u32 crc, stored_crc;
+ struct page *page;
+ struct bio_vec bio_vec;
+ struct bio bio;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ bio_init(&bio, &bio_vec, 1);
+ bio.bi_iter.bi_sector = 0;
+ bio.bi_opf = REQ_OP_READ;
+ bio_set_dev(&bio, sb->s_bdev);
+ bio_add_page(&bio, page, PAGE_SIZE, 0);
+
+ ret = submit_bio_wait(&bio);
+ if (ret)
+ goto free_page;
+
+ super = kmap(page);
+
+ ret = -EINVAL;
+ if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC)
+ goto unmap;
+
+ stored_crc = le32_to_cpu(super->s_crc);
+ super->s_crc = 0;
+ crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super));
+ if (crc != stored_crc) {
+ zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)",
+ crc, stored_crc);
+ goto unmap;
+ }
+
+ sbi->s_features = le64_to_cpu(super->s_features);
+ if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) {
+ zonefs_err(sb, "Unknown features set 0x%llx\n",
+ sbi->s_features);
+ goto unmap;
+ }
+
+ if (sbi->s_features & ZONEFS_F_UID) {
+ sbi->s_uid = make_kuid(current_user_ns(),
+ le32_to_cpu(super->s_uid));
+ if (!uid_valid(sbi->s_uid)) {
+ zonefs_err(sb, "Invalid UID feature\n");
+ goto unmap;
+ }
+ }
+
+ if (sbi->s_features & ZONEFS_F_GID) {
+ sbi->s_gid = make_kgid(current_user_ns(),
+ le32_to_cpu(super->s_gid));
+ if (!gid_valid(sbi->s_gid)) {
+ zonefs_err(sb, "Invalid GID feature\n");
+ goto unmap;
+ }
+ }
+
+ if (sbi->s_features & ZONEFS_F_PERM)
+ sbi->s_perm = le32_to_cpu(super->s_perm);
+
+ if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) {
+ zonefs_err(sb, "Reserved area is being used\n");
+ goto unmap;
+ }
+
+ uuid_copy(&sbi->s_uuid, (uuid_t *)super->s_uuid);
+ ret = 0;
+
+unmap:
+ kunmap(page);
+free_page:
+ __free_page(page);
+
+ return ret;
+}
+
+/*
+ * Check that the device is zoned. If it is, get the list of zones and create
+ * sub-directories and files according to the device zone configuration and
+ * format options.
+ */
+static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct zonefs_zone_data zd;
+ struct zonefs_sb_info *sbi;
+ struct inode *inode;
+ enum zonefs_ztype t;
+ int ret;
+
+ if (!bdev_is_zoned(sb->s_bdev)) {
+ zonefs_err(sb, "Not a zoned block device\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Initialize super block information: the maximum file size is updated
+ * when the zone files are created so that the format option
+ * ZONEFS_F_AGGRCNV which increases the maximum file size of a file
+ * beyond the zone size is taken into account.
+ */
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ spin_lock_init(&sbi->s_lock);
+ sb->s_fs_info = sbi;
+ sb->s_magic = ZONEFS_MAGIC;
+ sb->s_maxbytes = 0;
+ sb->s_op = &zonefs_sops;
+ sb->s_time_gran = 1;
+
+ /*
+ * The block size is set to the device physical sector size to ensure
+ * that write operations on 512e devices (512B logical block and 4KB
+ * physical block) are always aligned to the device physical blocks,
+ * as mandated by the ZBC/ZAC specifications.
+ */
+ sb_set_blocksize(sb, bdev_physical_block_size(sb->s_bdev));
+ sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev));
+ sbi->s_uid = GLOBAL_ROOT_UID;
+ sbi->s_gid = GLOBAL_ROOT_GID;
+ sbi->s_perm = 0640;
+ sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO;
+
+ ret = zonefs_read_super(sb);
+ if (ret)
+ return ret;
+
+ ret = zonefs_parse_options(sb, data);
+ if (ret)
+ return ret;
+
+ memset(&zd, 0, sizeof(struct zonefs_zone_data));
+ zd.sb = sb;
+ ret = zonefs_get_zone_info(&zd);
+ if (ret)
+ goto cleanup;
+
+ zonefs_info(sb, "Mounting %u zones",
+ blkdev_nr_zones(sb->s_bdev->bd_disk));
+
+ /* Create root directory inode */
+ ret = -ENOMEM;
+ inode = new_inode(sb);
+ if (!inode)
+ goto cleanup;
+
+ inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk);
+ inode->i_mode = S_IFDIR | 0555;
+ inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
+ inode->i_op = &zonefs_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ set_nlink(inode, 2);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ goto cleanup;
+
+ /* Create and populate files in zone groups directories */
+ for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) {
+ ret = zonefs_create_zgroup(&zd, t);
+ if (ret)
+ break;
+ }
+
+cleanup:
+ zonefs_cleanup_zone_info(&zd);
+
+ return ret;
+}
+
+static struct dentry *zonefs_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super);
+}
+
+static void zonefs_kill_super(struct super_block *sb)
+{
+ struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
+
+ if (sb->s_root)
+ d_genocide(sb->s_root);
+ kill_block_super(sb);
+ kfree(sbi);
+}
+
+/*
+ * File system definition and registration.
+ */
+static struct file_system_type zonefs_type = {
+ .owner = THIS_MODULE,
+ .name = "zonefs",
+ .mount = zonefs_mount,
+ .kill_sb = zonefs_kill_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
+static int __init zonefs_init_inodecache(void)
+{
+ zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache",
+ sizeof(struct zonefs_inode_info), 0,
+ (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ NULL);
+ if (zonefs_inode_cachep == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+static void zonefs_destroy_inodecache(void)
+{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy the inode cache.
+ */
+ rcu_barrier();
+ kmem_cache_destroy(zonefs_inode_cachep);
+}
+
+static int __init zonefs_init(void)
+{
+ int ret;
+
+ BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
+
+ ret = zonefs_init_inodecache();
+ if (ret)
+ return ret;
+
+ ret = register_filesystem(&zonefs_type);
+ if (ret) {
+ zonefs_destroy_inodecache();
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit zonefs_exit(void)
+{
+ zonefs_destroy_inodecache();
+ unregister_filesystem(&zonefs_type);
+}
+
+MODULE_AUTHOR("Damien Le Moal");
+MODULE_DESCRIPTION("Zone file system for zoned block devices");
+MODULE_LICENSE("GPL");
+module_init(zonefs_init);
+module_exit(zonefs_exit);
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
new file mode 100644
index 000000000000..ad17fef7ce91
--- /dev/null
+++ b/fs/zonefs/zonefs.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple zone file system for zoned block devices.
+ *
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ */
+#ifndef __ZONEFS_H__
+#define __ZONEFS_H__
+
+#include <linux/fs.h>
+#include <linux/magic.h>
+#include <linux/uuid.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+
+/*
+ * Maximum length of file names: this only needs to be large enough to fit
+ * the zone group directory names and a decimal zone number for file names.
+ * 16 characters is plenty.
+ */
+#define ZONEFS_NAME_MAX 16
+
+/*
+ * Zone types: ZONEFS_ZTYPE_SEQ is used for all sequential zone types
+ * defined in linux/blkzoned.h, that is, BLK_ZONE_TYPE_SEQWRITE_REQ and
+ * BLK_ZONE_TYPE_SEQWRITE_PREF.
+ */
+enum zonefs_ztype {
+ ZONEFS_ZTYPE_CNV,
+ ZONEFS_ZTYPE_SEQ,
+ ZONEFS_ZTYPE_MAX,
+};
+
+static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone)
+{
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return ZONEFS_ZTYPE_CNV;
+ return ZONEFS_ZTYPE_SEQ;
+}
+
+/*
+ * In-memory inode data.
+ */
+struct zonefs_inode_info {
+ struct inode i_vnode;
+
+ /* File zone type */
+ enum zonefs_ztype i_ztype;
+
+ /* File zone start sector (512B unit) */
+ sector_t i_zsector;
+
+ /* File zone write pointer position (sequential zones only) */
+ loff_t i_wpoffset;
+
+ /* File maximum size */
+ loff_t i_max_size;
+
+ /*
+ * To serialise fully against both syscall and mmap based IO and
+ * sequential file truncation, two locks are used. For serializing
+ * zonefs_seq_file_truncate() against zonefs_iomap_begin(), that is,
+ * file truncate operations against block mapping, i_truncate_mutex is
+ * used. i_truncate_mutex also protects against concurrent accesses
+ * and changes to the inode private data, and in particular changes to
+ * a sequential file size on completion of direct IO writes.
+ * Serialization of mmap read IOs with truncate and syscall IO
+ * operations is done with i_mmap_sem in addition to i_truncate_mutex.
+ * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first,
+ * i_truncate_mutex second).
+ */
+ struct mutex i_truncate_mutex;
+ struct rw_semaphore i_mmap_sem;
+};
+
+static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode)
+{
+ return container_of(inode, struct zonefs_inode_info, i_vnode);
+}
+
+/*
+ * On-disk super block (block 0).
+ */
+#define ZONEFS_LABEL_LEN 64
+#define ZONEFS_UUID_SIZE 16
+#define ZONEFS_SUPER_SIZE 4096
+
+struct zonefs_super {
+
+ /* Magic number */
+ __le32 s_magic;
+
+ /* Checksum */
+ __le32 s_crc;
+
+ /* Volume label */
+ char s_label[ZONEFS_LABEL_LEN];
+
+ /* 128-bit uuid */
+ __u8 s_uuid[ZONEFS_UUID_SIZE];
+
+ /* Features */
+ __le64 s_features;
+
+ /* UID/GID to use for files */
+ __le32 s_uid;
+ __le32 s_gid;
+
+ /* File permissions */
+ __le32 s_perm;
+
+ /* Padding to ZONEFS_SUPER_SIZE bytes */
+ __u8 s_reserved[3988];
+
+} __packed;
+
+/*
+ * Feature flags: specified in the s_features field of the on-disk super
+ * block struct zonefs_super and in-memory in the s_feartures field of
+ * struct zonefs_sb_info.
+ */
+enum zonefs_features {
+ /*
+ * Aggregate contiguous conventional zones into a single file.
+ */
+ ZONEFS_F_AGGRCNV = 1ULL << 0,
+ /*
+ * Use super block specified UID for files instead of default 0.
+ */
+ ZONEFS_F_UID = 1ULL << 1,
+ /*
+ * Use super block specified GID for files instead of default 0.
+ */
+ ZONEFS_F_GID = 1ULL << 2,
+ /*
+ * Use super block specified file permissions instead of default 640.
+ */
+ ZONEFS_F_PERM = 1ULL << 3,
+};
+
+#define ZONEFS_F_DEFINED_FEATURES \
+ (ZONEFS_F_AGGRCNV | ZONEFS_F_UID | ZONEFS_F_GID | ZONEFS_F_PERM)
+
+/*
+ * Mount options for zone write pointer error handling.
+ */
+#define ZONEFS_MNTOPT_ERRORS_RO (1 << 0) /* Make zone file readonly */
+#define ZONEFS_MNTOPT_ERRORS_ZRO (1 << 1) /* Make zone file offline */
+#define ZONEFS_MNTOPT_ERRORS_ZOL (1 << 2) /* Make zone file offline */
+#define ZONEFS_MNTOPT_ERRORS_REPAIR (1 << 3) /* Remount read-only */
+#define ZONEFS_MNTOPT_ERRORS_MASK \
+ (ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \
+ ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR)
+
+/*
+ * In-memory Super block information.
+ */
+struct zonefs_sb_info {
+
+ unsigned long s_mount_opts;
+
+ spinlock_t s_lock;
+
+ unsigned long long s_features;
+ kuid_t s_uid;
+ kgid_t s_gid;
+ umode_t s_perm;
+ uuid_t s_uuid;
+ unsigned int s_zone_sectors_shift;
+
+ unsigned int s_nr_files[ZONEFS_ZTYPE_MAX];
+
+ loff_t s_blocks;
+ loff_t s_used_blocks;
+};
+
+static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+#define zonefs_info(sb, format, args...) \
+ pr_info("zonefs (%s): " format, sb->s_id, ## args)
+#define zonefs_err(sb, format, args...) \
+ pr_err("zonefs (%s) ERROR: " format, sb->s_id, ## args)
+#define zonefs_warn(sb, format, args...) \
+ pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args)
+
+#endif
OpenPOWER on IntegriCloud