diff options
Diffstat (limited to 'fs')
38 files changed, 5165 insertions, 130 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 7b623e9fc1b0..708ba336e689 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -40,6 +40,7 @@ source "fs/ocfs2/Kconfig" source "fs/btrfs/Kconfig" source "fs/nilfs2/Kconfig" source "fs/f2fs/Kconfig" +source "fs/zonefs/Kconfig" config FS_DAX bool "Direct Access (DAX) support" @@ -264,6 +265,7 @@ source "fs/pstore/Kconfig" source "fs/sysv/Kconfig" source "fs/ufs/Kconfig" source "fs/erofs/Kconfig" +source "fs/vboxsf/Kconfig" endif # MISC_FILESYSTEMS diff --git a/fs/Makefile b/fs/Makefile index 98be354fdb61..505e51166973 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -133,3 +133,5 @@ obj-$(CONFIG_CEPH_FS) += ceph/ obj-$(CONFIG_PSTORE) += pstore/ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ +obj-$(CONFIG_VBOXSF_FS) += vboxsf/ +obj-$(CONFIG_ZONEFS_FS) += zonefs/ diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h index 0f0dc1c1fe41..153d5c842a9b 100644 --- a/fs/cifs/cifs_ioctl.h +++ b/fs/cifs/cifs_ioctl.h @@ -65,6 +65,11 @@ struct smb3_key_debug_info { __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; +struct smb3_notify { + __u32 completion_filter; + bool watch_tree; +} __packed; + #define CIFS_IOCTL_MAGIC 0xCF #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) #define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4) @@ -72,3 +77,4 @@ struct smb3_key_debug_info { #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array) #define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info) #define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info) +#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index fb41e51dd574..440828afcdde 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1084,7 +1084,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, struct cifs_ntsd *pntsd = NULL; int oplock = 0; unsigned int xid; - int rc, create_options = 0; + int rc; struct cifs_tcon *tcon; struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); struct cifs_fid fid; @@ -1096,13 +1096,10 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = READ_CONTROL; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1147,7 +1144,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, { int oplock = 0; unsigned int xid; - int rc, access_flags, create_options = 0; + int rc, access_flags; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); @@ -1160,9 +1157,6 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -1171,7 +1165,7 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = access_flags; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5492b9860baa..febab27cd838 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,7 +275,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_ffree = 0; /* unlimited */ if (server->ops->queryfs) - rc = server->ops->queryfs(xid, tcon, buf); + rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); return 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 239338d57086..de82cfa44b1a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -298,7 +298,8 @@ struct smb_version_operations { const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ - void (*qfs_tcon)(const unsigned int, struct cifs_tcon *); + void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, + struct cifs_sb_info *); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); @@ -409,7 +410,7 @@ struct smb_version_operations { struct cifsInodeInfo *); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, - struct kstatfs *); + struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); @@ -430,6 +431,8 @@ struct smb_version_operations { struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); + int (*notify)(const unsigned int xid, struct file *pfile, + void __user *pbuf); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); @@ -490,6 +493,7 @@ struct smb_version_operations { /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 948bf3474db1..89eaaf46d1ca 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -612,4 +612,12 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, } #endif +static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) +{ + if (cifs_sb && (backup_cred(cifs_sb))) + return options | CREATE_OPEN_BACKUP_INTENT; + else + return options; +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a481296f417f..3c89569e7210 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -260,7 +260,7 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) if (server->tcpStatus != CifsNeedReconnect) break; - if (--retries) + if (retries && --retries) continue; /* diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0aa3623ae0e1..a941ac7a659d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4365,7 +4365,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb, /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(*xid, tcon); + server->ops->qfs_tcon(*xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f3b79012ff29..0ef099442f20 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -355,13 +355,10 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (!tcon->unix_ext && (mode & S_IWUGO) == 0) create_options |= CREATE_OPTION_READONLY; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a4e8f7d445ac..bc9516ab4b34 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -222,9 +222,6 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (!buf) return -ENOMEM; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -235,7 +232,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = fid; @@ -752,9 +749,6 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) desired_access = cifs_convert_flags(cfile->f_flags); - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - /* O_SYNC also has bit for O_DSYNC so following check picks up either */ if (cfile->f_flags & O_SYNC) create_options |= CREATE_WRITE_THROUGH; @@ -768,7 +762,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.disposition = disposition; oparms.path = full_path; oparms.fid = &cfile->fid; @@ -2599,8 +2593,10 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(inode->i_ino, rc); return rc; + } xid = get_xid(); @@ -2638,8 +2634,10 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); rc = file_write_and_wait_range(file, start, end); - if (rc) + if (rc) { + trace_cifs_fsync_err(file_inode(file)->i_ino, rc); return rc; + } xid = get_xid(); @@ -2672,7 +2670,8 @@ int cifs_flush(struct file *file, fl_owner_t id) rc = filemap_write_and_wait(inode->i_mapping); cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); - + if (rc) + trace_cifs_flush_err(inode->i_ino, rc); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 676e96a7a9f0..9ba623b601ec 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -475,9 +475,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -1285,7 +1283,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = DELETE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1823,7 +1821,7 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, oparms.cifs_sb = cifs_sb; /* open the file to be renamed -- we need DELETE perms */ oparms.desired_access = DELETE; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = from_path; oparms.fid = &fid; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 1a01e108d75e..4a73e63c4d43 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -65,7 +65,7 @@ static long cifs_ioctl_query_info(unsigned int xid, struct file *filep, if (tcon->ses->server->ops->ioctl_query_info) rc = tcon->ses->server->ops->ioctl_query_info( - xid, tcon, utf16_path, + xid, tcon, cifs_sb, utf16_path, filep->private_data ? 0 : 1, p); else rc = -EOPNOTSUPP; @@ -169,6 +169,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) unsigned int xid; struct cifsFileInfo *pSMBFile = filep->private_data; struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; __u64 ExtAttrBits = 0; __u64 caps; @@ -299,6 +300,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) else rc = 0; break; + case CIFS_IOC_NOTIFY: + if (!S_ISDIR(inode->i_mode)) { + /* Notify can only be done on directories */ + rc = -EOPNOTSUPP; + break; + } + cifs_sb = CIFS_SB(inode->i_sb); + tcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + if (tcon && tcon->ses->server->ops->notify) { + rc = tcon->ses->server->ops->notify(xid, + filep, (void __user *)arg); + cifs_dbg(FYI, "ioctl notify rc %d\n", rc); + } else + rc = -EOPNOTSUPP; + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b736acd3917b..852aa00ec729 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -315,7 +315,7 @@ cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -353,15 +353,11 @@ cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; - - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.path = path; oparms.fid = &fid; @@ -402,9 +398,7 @@ smb3_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_READ; - oparms.create_options = CREATE_NOT_DIR; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.fid = &fid; oparms.reconnect = false; @@ -457,14 +451,10 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; struct cifs_open_parms oparms; struct cifs_io_parms io_parms; - int create_options = CREATE_NOT_DIR; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; struct kvec iov[2]; - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_dbg(FYI, "%s: path: %s\n", __func__, path); utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); @@ -474,7 +464,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d17587c2c4ab..ba9dadf3be24 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -196,7 +196,8 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f0795c856d8f..43a88e26d26b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -101,7 +101,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses) iface_count = ses->iface_count; if (iface_count <= 0) { spin_unlock(&ses->iface_lock); - cifs_dbg(FYI, "no iface list available to open channels\n"); + cifs_dbg(VFS, "no iface list available to open channels\n"); return 0; } ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces), diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d70a2bb062df..eb994e313c6a 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -504,7 +504,8 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info) } static void -cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +cifs_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { CIFSSMBQFSDeviceInfo(xid, tcon); CIFSSMBQFSAttributeInfo(xid, tcon); @@ -565,7 +566,7 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -793,7 +794,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES; - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -872,7 +873,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid, static int cifs_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc = -EOPNOTSUPP; @@ -970,7 +971,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; - oparms.create_options = OPEN_REPARSE_POINT; + oparms.create_options = cifs_create_options(cifs_sb, + OPEN_REPARSE_POINT); oparms.disposition = FILE_OPEN; oparms.path = full_path; oparms.fid = &fid; @@ -1029,7 +1031,6 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -1090,13 +1091,11 @@ cifs_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 5ef5e97a6d13..1cf207564ff9 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -99,9 +99,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = create_disposition; - oparms.create_options = create_options; - if (backup_cred(cifs_sb)) - oparms.create_options |= CREATE_OPEN_BACKUP_INTENT; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; oparms.mode = mode; @@ -457,7 +455,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, /* If it is a root and its handle is cached then use it */ if (!strlen(full_path) && !no_cached_open) { - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) goto out; @@ -474,9 +472,6 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - cifs_get_readable_path(tcon, full_path, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6787fce26f20..baa825f4cec0 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -655,7 +655,8 @@ smb2_cached_lease_break(struct work_struct *work) /* * Open the directory at the root of a share */ -int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) +int open_shroot(unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid) { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = ses->server; @@ -702,7 +703,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid) rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE; oparms.tcon = tcon; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; oparms.fid = pfid; @@ -818,7 +819,8 @@ oshr_free: } static void -smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -830,7 +832,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -838,7 +840,7 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL, NULL); else - rc = open_shroot(xid, tcon, &fid); + rc = open_shroot(xid, tcon, cifs_sb, &fid); if (rc) return; @@ -860,7 +862,8 @@ smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) } static void -smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) +smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -871,7 +874,7 @@ smb2_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon) oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -906,10 +909,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1151,10 +1151,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_WRITE_EA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -1422,6 +1419,7 @@ req_res_key_exit: static int smb2_ioctl_query_info(const unsigned int xid, struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p) { @@ -1447,6 +1445,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct kvec close_iov[1]; unsigned int size[2]; void *data[2]; + int create_options = is_dir ? CREATE_NOT_FILE : CREATE_NOT_DIR; memset(rqst, 0, sizeof(rqst)); resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER; @@ -1477,10 +1476,7 @@ smb2_ioctl_query_info(const unsigned int xid, memset(&oparms, 0, sizeof(oparms)); oparms.tcon = tcon; oparms.disposition = FILE_OPEN; - if (is_dir) - oparms.create_options = CREATE_NOT_FILE; - else - oparms.create_options = CREATE_NOT_DIR; + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2049,6 +2045,66 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon, return rc; } + + +static int +smb3_notify(const unsigned int xid, struct file *pfile, + void __user *ioc_buf) +{ + struct smb3_notify notify; + struct dentry *dentry = pfile->f_path.dentry; + struct inode *inode = file_inode(pfile); + struct cifs_sb_info *cifs_sb; + struct cifs_open_parms oparms; + struct cifs_fid fid; + struct cifs_tcon *tcon; + unsigned char *path = NULL; + __le16 *utf16_path = NULL; + u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + int rc = 0; + + path = build_path_from_dentry(dentry); + if (path == NULL) + return -ENOMEM; + + cifs_sb = CIFS_SB(inode->i_sb); + + utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb); + if (utf16_path == NULL) { + rc = -ENOMEM; + goto notify_exit; + } + + if (copy_from_user(¬ify, ioc_buf, sizeof(struct smb3_notify))) { + rc = -EFAULT; + goto notify_exit; + } + + tcon = cifs_sb_master_tcon(cifs_sb); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); + if (rc) + goto notify_exit; + + rc = SMB2_change_notify(xid, tcon, fid.persistent_fid, fid.volatile_fid, + notify.watch_tree, notify.completion_filter); + + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + + cifs_dbg(FYI, "change notify for path %s rc %d\n", path, rc); + +notify_exit: + kfree(path); + kfree(utf16_path); + return rc; +} + static int smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, const char *path, struct cifs_sb_info *cifs_sb, @@ -2086,10 +2142,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; oparms.disposition = FILE_OPEN; - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = fid; oparms.reconnect = false; @@ -2343,10 +2396,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = desired_access; oparms.disposition = FILE_OPEN; - if (cifs_sb && backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2402,7 +2452,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, static int smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { struct smb2_query_info_rsp *rsp; struct smb2_fs_full_size_info *info = NULL; @@ -2417,7 +2467,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, FS_FULL_SIZE_INFORMATION, SMB2_O_INFO_FILESYSTEM, sizeof(struct smb2_fs_full_size_info), - &rsp_iov, &buftype, NULL); + &rsp_iov, &buftype, cifs_sb); if (rc) goto qfs_exit; @@ -2439,7 +2489,7 @@ qfs_exit: static int smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, - struct kstatfs *buf) + struct cifs_sb_info *cifs_sb, struct kstatfs *buf) { int rc; __le16 srch_path = 0; /* Null - open root of share */ @@ -2448,12 +2498,12 @@ smb311_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; if (!tcon->posix_extensions) - return smb2_queryfs(xid, tcon, buf); + return smb2_queryfs(xid, tcon, cifs_sb, buf); oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - oparms.create_options = 0; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2722,6 +2772,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, struct smb2_create_rsp *create_rsp; struct smb2_ioctl_rsp *ioctl_rsp; struct reparse_data_buffer *reparse_buf; + int create_options = is_reparse_point ? OPEN_REPARSE_POINT : 0; u32 plen; cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); @@ -2748,14 +2799,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, oparms.tcon = tcon; oparms.desired_access = FILE_READ_ATTRIBUTES; oparms.disposition = FILE_OPEN; - - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (is_reparse_point) - oparms.create_options = OPEN_REPARSE_POINT; - + oparms.create_options = cifs_create_options(cifs_sb, create_options); oparms.fid = &fid; oparms.reconnect = false; @@ -2934,11 +2978,6 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) { rc = -ENOMEM; @@ -2949,6 +2988,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb, oparms.tcon = tcon; oparms.desired_access = READ_CONTROL; oparms.disposition = FILE_OPEN; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.fid = &fid; oparms.reconnect = false; @@ -2990,11 +3030,6 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, tcon = tlink_tcon(tlink); xid = get_xid(); - if (backup_cred(cifs_sb)) - oparms.create_options = CREATE_OPEN_BACKUP_INTENT; - else - oparms.create_options = 0; - if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP) access_flags = WRITE_OWNER; else @@ -3009,6 +3044,7 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen, oparms.tcon = tcon; oparms.desired_access = access_flags; + oparms.create_options = cifs_create_options(cifs_sb, 0); oparms.disposition = FILE_OPEN; oparms.path = path; oparms.fid = &fid; @@ -4491,7 +4527,6 @@ smb2_make_node(unsigned int xid, struct inode *inode, { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); int rc = -EPERM; - int create_options = CREATE_NOT_DIR | CREATE_OPTION_SPECIAL; FILE_ALL_INFO *buf = NULL; struct cifs_io_parms io_parms; __u32 oplock = 0; @@ -4527,13 +4562,11 @@ smb2_make_node(unsigned int xid, struct inode *inode, goto out; } - if (backup_cred(cifs_sb)) - create_options |= CREATE_OPEN_BACKUP_INTENT; - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; - oparms.create_options = create_options; + oparms.create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR | + CREATE_OPTION_SPECIAL); oparms.disposition = FILE_CREATE; oparms.path = full_path; oparms.fid = &fid; @@ -4868,6 +4901,7 @@ struct smb_version_operations smb30_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, @@ -4978,6 +5012,7 @@ struct smb_version_operations smb311_operations = { .dir_needs_close = smb2_dir_needs_close, .fallocate = smb3_fallocate, .enum_snapshots = smb3_enum_snapshots, + .notify = smb3_notify, .init_transform_rq = smb3_init_transform_rq, .is_transform_hdr = smb3_is_transform_hdr, .receive_transform = smb3_receive_transform, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 14f209f7376f..1234f9ccab03 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -397,6 +402,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } @@ -1933,6 +1939,16 @@ parse_query_id_ctxt(struct create_context *cc, struct smb2_file_all_info *buf) buf->IndexNumber = pdisk_id->DiskFileId; } +static void +parse_posix_ctxt(struct create_context *cc, struct smb_posix_info *pposix_inf) +{ + /* struct smb_posix_info *ppinf = (struct smb_posix_info *)cc; */ + + /* TODO: Need to add parsing for the context and return */ + printk_once(KERN_WARNING + "SMB3 3.11 POSIX response context not completed yet\n"); +} + void smb2_parse_contexts(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, @@ -1944,6 +1960,9 @@ smb2_parse_contexts(struct TCP_Server_Info *server, unsigned int next; unsigned int remaining; char *name; + const char smb3_create_tag_posix[] = {0x93, 0xAD, 0x25, 0x50, 0x9C, + 0xB4, 0x11, 0xE7, 0xB4, 0x23, 0x83, + 0xDE, 0x96, 0x8B, 0xCD, 0x7C}; *oplock = 0; data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset); @@ -1963,6 +1982,15 @@ smb2_parse_contexts(struct TCP_Server_Info *server, else if (buf && (le16_to_cpu(cc->NameLength) == 4) && strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0) parse_query_id_ctxt(cc, buf); + else if ((le16_to_cpu(cc->NameLength) == 16)) { + if (memcmp(name, smb3_create_tag_posix, 16) == 0) + parse_posix_ctxt(cc, NULL); + } + /* else { + cifs_dbg(FYI, "Context not matched with len %d\n", + le16_to_cpu(cc->NameLength)); + cifs_dump_mem("Cctxt name: ", name, 4); + } */ next = le32_to_cpu(cc->Next); if (!next) @@ -3357,6 +3385,7 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst, req->PersistentFileId = persistent_fid; req->VolatileFileId = volatile_fid; + /* See note 354 of MS-SMB2, 64K max */ req->OutputBufferLength = cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE); req->CompletionFilter = cpu_to_le32(completion_filter); @@ -4023,6 +4052,9 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->cfile->fid.persistent_fid, tcon->tid, tcon->ses->Suid, wdata->offset, wdata->bytes, wdata->result); + if (wdata->result == -ENOSPC) + printk_once(KERN_WARNING "Out of space writing to %s\n", + tcon->treeName); } else trace_smb3_write_done(0 /* no xid */, wdata->cfile->fid.persistent_fid, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 4c43dbd1e089..fa03df130f1a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -1519,6 +1519,7 @@ struct smb3_fs_vol_info { #define FILE_NORMALIZED_NAME_INFORMATION 48 #define FILEID_GLOBAL_TX_DIRECTORY_INFORMATION 50 #define FILE_STANDARD_LINK_INFORMATION 54 +#define FILE_ID_INFORMATION 59 struct smb2_file_internal_info { __le64 IndexNumber; @@ -1593,6 +1594,21 @@ struct smb2_file_network_open_info { __le32 Reserved; } __packed; /* level 34 Query also similar returned in close rsp and open rsp */ +/* See MS-FSCC 2.4.43 */ +struct smb2_file_id_information { + __le64 VolumeSerialNumber; + __u64 PersistentFileId; /* opaque endianness */ + __u64 VolatileFileId; /* opaque endianness */ +} __packed; /* level 59 */ + extern char smb2_padding[7]; +/* equivalent of the contents of SMB3.1.1 POSIX open context response */ +struct smb_posix_info { + __le32 nlink; + __le32 reparse_tag; + __le32 mode; + kuid_t uid; + kuid_t gid; +}; #endif /* _SMB2PDU_H */ diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 6c678e00046f..de6388ef344f 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -68,7 +68,7 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon, - struct cifs_fid *pfid); + struct cifs_sb_info *cifs_sb, struct cifs_fid *pfid); extern void close_shroot(struct cached_fid *cfid); extern void close_shroot_lease(struct cached_fid *cfid); extern void close_shroot_lease_locked(struct cached_fid *cfid); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index fe6acfce3390..08b703b7a15e 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -104,13 +104,14 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) { struct cifs_chan *chan; struct cifs_ses *ses = NULL; + struct TCP_Server_Info *it = NULL; int i; int rc = 0; spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(it, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &it->smb_ses_list, smb_ses_list) { if (ses->Suid == ses_id) goto found; } diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index e7e350b13d6a..4cb0d5f7ce45 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -547,6 +547,33 @@ DEFINE_EVENT(smb3_exit_err_class, smb3_##name, \ DEFINE_SMB3_EXIT_ERR_EVENT(exit_err); + +DECLARE_EVENT_CLASS(smb3_sync_err_class, + TP_PROTO(unsigned long ino, + int rc), + TP_ARGS(ino, rc), + TP_STRUCT__entry( + __field(unsigned long, ino) + __field(int, rc) + ), + TP_fast_assign( + __entry->ino = ino; + __entry->rc = rc; + ), + TP_printk("\tino=%lu rc=%d", + __entry->ino, __entry->rc) +) + +#define DEFINE_SMB3_SYNC_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_sync_err_class, cifs_##name, \ + TP_PROTO(unsigned long ino, \ + int rc), \ + TP_ARGS(ino, rc)) + +DEFINE_SMB3_SYNC_ERR_EVENT(fsync_err); +DEFINE_SMB3_SYNC_ERR_EVENT(flush_err); + + DECLARE_EVENT_CLASS(smb3_enter_exit_class, TP_PROTO(unsigned int xid, const char *func_name), diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile index d46e9baee285..b88aecc86550 100644 --- a/fs/unicode/Makefile +++ b/fs/unicode/Makefile @@ -35,4 +35,4 @@ $(obj)/utf8data.h: $(src)/utf8data.h_shipped FORCE endif targets += utf8data.h -hostprogs-y += mkutf8data +hostprogs += mkutf8data diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig new file mode 100644 index 000000000000..b84586ae08b3 --- /dev/null +++ b/fs/vboxsf/Kconfig @@ -0,0 +1,10 @@ +config VBOXSF_FS + tristate "VirtualBox guest shared folder (vboxsf) support" + depends on X86 && VBOXGUEST + select NLS + help + VirtualBox hosts can share folders with guests, this driver + implements the Linux-guest side of this allowing folders exported + by the host to be mounted under Linux. + + If you want to use shared folders in VirtualBox guests, answer Y or M. diff --git a/fs/vboxsf/Makefile b/fs/vboxsf/Makefile new file mode 100644 index 000000000000..9e4328e79623 --- /dev/null +++ b/fs/vboxsf/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: MIT + +obj-$(CONFIG_VBOXSF_FS) += vboxsf.o + +vboxsf-y := dir.o file.o utils.o vboxsf_wrappers.o super.o diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c new file mode 100644 index 000000000000..dd147b490982 --- /dev/null +++ b/fs/vboxsf/dir.c @@ -0,0 +1,427 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Directory inode and file operations + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +static int vboxsf_dir_open(struct inode *inode, struct file *file) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_createparms params = {}; + struct vboxsf_dir_info *sf_d; + int err; + + sf_d = vboxsf_dir_info_alloc(); + if (!sf_d) + return -ENOMEM; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_DIRECTORY | SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | SHFL_CF_ACCESS_READ; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err) + goto err_free_dir_info; + + if (params.result != SHFL_FILE_EXISTS) { + err = -ENOENT; + goto err_close; + } + + err = vboxsf_dir_read_all(sbi, sf_d, params.handle); + if (err) + goto err_close; + + vboxsf_close(sbi->root, params.handle); + file->private_data = sf_d; + return 0; + +err_close: + vboxsf_close(sbi->root, params.handle); +err_free_dir_info: + vboxsf_dir_info_free(sf_d); + return err; +} + +static int vboxsf_dir_release(struct inode *inode, struct file *file) +{ + if (file->private_data) + vboxsf_dir_info_free(file->private_data); + + return 0; +} + +static unsigned int vboxsf_get_d_type(u32 mode) +{ + unsigned int d_type; + + switch (mode & SHFL_TYPE_MASK) { + case SHFL_TYPE_FIFO: + d_type = DT_FIFO; + break; + case SHFL_TYPE_DEV_CHAR: + d_type = DT_CHR; + break; + case SHFL_TYPE_DIRECTORY: + d_type = DT_DIR; + break; + case SHFL_TYPE_DEV_BLOCK: + d_type = DT_BLK; + break; + case SHFL_TYPE_FILE: + d_type = DT_REG; + break; + case SHFL_TYPE_SYMLINK: + d_type = DT_LNK; + break; + case SHFL_TYPE_SOCKET: + d_type = DT_SOCK; + break; + case SHFL_TYPE_WHITEOUT: + d_type = DT_WHT; + break; + default: + d_type = DT_UNKNOWN; + break; + } + return d_type; +} + +static bool vboxsf_dir_emit(struct file *dir, struct dir_context *ctx) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(file_inode(dir)->i_sb); + struct vboxsf_dir_info *sf_d = dir->private_data; + struct shfl_dirinfo *info; + struct vboxsf_dir_buf *b; + unsigned int d_type; + loff_t i, cur = 0; + ino_t fake_ino; + void *end; + int err; + + list_for_each_entry(b, &sf_d->info_list, head) { +try_next_entry: + if (ctx->pos >= cur + b->entries) { + cur += b->entries; + continue; + } + + /* + * Note the vboxsf_dir_info objects we are iterating over here + * are variable sized, so the info pointer may end up being + * unaligned. This is how we get the data from the host. + * Since vboxsf is only supported on x86 machines this is not + * a problem. + */ + for (i = 0, info = b->buf; i < ctx->pos - cur; i++) { + end = &info->name.string.utf8[info->name.size]; + /* Only happens if the host gives us corrupt data */ + if (WARN_ON(end > (b->buf + b->used))) + return false; + info = end; + } + + end = &info->name.string.utf8[info->name.size]; + if (WARN_ON(end > (b->buf + b->used))) + return false; + + /* Info now points to the right entry, emit it. */ + d_type = vboxsf_get_d_type(info->info.attr.mode); + + /* + * On 32 bit systems pos is 64 signed, while ino is 32 bit + * unsigned so fake_ino may overflow, check for this. + */ + if ((ino_t)(ctx->pos + 1) != (u64)(ctx->pos + 1)) { + vbg_err("vboxsf: fake ino overflow, truncating dir\n"); + return false; + } + fake_ino = ctx->pos + 1; + + if (sbi->nls) { + char d_name[NAME_MAX]; + + err = vboxsf_nlscpy(sbi, d_name, NAME_MAX, + info->name.string.utf8, + info->name.length); + if (err) { + /* skip erroneous entry and proceed */ + ctx->pos += 1; + goto try_next_entry; + } + + return dir_emit(ctx, d_name, strlen(d_name), + fake_ino, d_type); + } + + return dir_emit(ctx, info->name.string.utf8, info->name.length, + fake_ino, d_type); + } + + return false; +} + +static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx) +{ + bool emitted; + + do { + emitted = vboxsf_dir_emit(dir, ctx); + if (emitted) + ctx->pos += 1; + } while (emitted); + + return 0; +} + +const struct file_operations vboxsf_dir_fops = { + .open = vboxsf_dir_open, + .iterate = vboxsf_dir_iterate, + .release = vboxsf_dir_release, + .read = generic_read_dir, + .llseek = generic_file_llseek, +}; + +/* + * This is called during name resolution/lookup to check if the @dentry in + * the cache is still valid. the job is handled by vboxsf_inode_revalidate. + */ +static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) +{ + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (d_really_is_positive(dentry)) + return vboxsf_inode_revalidate(dentry) == 0; + else + return vboxsf_stat_dentry(dentry, NULL) == -ENOENT; +} + +const struct dentry_operations vboxsf_dentry_ops = { + .d_revalidate = vboxsf_dentry_revalidate +}; + +/* iops */ + +static struct dentry *vboxsf_dir_lookup(struct inode *parent, + struct dentry *dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_fsobjinfo fsinfo; + struct inode *inode; + int err; + + dentry->d_time = jiffies; + + err = vboxsf_stat_dentry(dentry, &fsinfo); + if (err) { + inode = (err == -ENOENT) ? NULL : ERR_PTR(err); + } else { + inode = vboxsf_new_inode(parent->i_sb); + if (!IS_ERR(inode)) + vboxsf_init_inode(sbi, inode, &fsinfo); + } + + return d_splice_alias(inode, dentry); +} + +static int vboxsf_dir_instantiate(struct inode *parent, struct dentry *dentry, + struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_i; + struct inode *inode; + + inode = vboxsf_new_inode(parent->i_sb); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + sf_i = VBOXSF_I(inode); + /* The host may have given us different attr then requested */ + sf_i->force_restat = 1; + vboxsf_init_inode(sbi, inode, info); + + d_instantiate(dentry, inode); + + return 0; +} + +static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry, + umode_t mode, int is_dir) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_CREATE_IF_NEW | + SHFL_CF_ACT_FAIL_IF_EXISTS | + SHFL_CF_ACCESS_READWRITE | + (is_dir ? SHFL_CF_DIRECTORY : 0); + params.info.attr.mode = (mode & 0777) | + (is_dir ? SHFL_TYPE_DIRECTORY : SHFL_TYPE_FILE); + params.info.attr.additional = SHFLFSOBJATTRADD_NOTHING; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_CREATED) + return -EPERM; + + vboxsf_close(sbi->root, params.handle); + + err = vboxsf_dir_instantiate(parent, dentry, ¶ms.info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry, + umode_t mode, bool excl) +{ + return vboxsf_dir_create(parent, dentry, mode, 0); +} + +static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry, + umode_t mode) +{ + return vboxsf_dir_create(parent, dentry, mode, 1); +} + +static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct inode *inode = d_inode(dentry); + struct shfl_string *path; + u32 flags; + int err; + + if (S_ISDIR(inode->i_mode)) + flags = SHFL_REMOVE_DIR; + else + flags = SHFL_REMOVE_FILE; + + if (S_ISLNK(inode->i_mode)) + flags |= SHFL_REMOVE_SYMLINK; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_remove(sbi->root, path, flags); + __putname(path); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + + return 0; +} + +static int vboxsf_dir_rename(struct inode *old_parent, + struct dentry *old_dentry, + struct inode *new_parent, + struct dentry *new_dentry, + unsigned int flags) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(old_parent->i_sb); + struct vboxsf_inode *sf_old_parent_i = VBOXSF_I(old_parent); + struct vboxsf_inode *sf_new_parent_i = VBOXSF_I(new_parent); + u32 shfl_flags = SHFL_RENAME_FILE | SHFL_RENAME_REPLACE_IF_EXISTS; + struct shfl_string *old_path, *new_path; + int err; + + if (flags) + return -EINVAL; + + old_path = vboxsf_path_from_dentry(sbi, old_dentry); + if (IS_ERR(old_path)) + return PTR_ERR(old_path); + + new_path = vboxsf_path_from_dentry(sbi, new_dentry); + if (IS_ERR(new_path)) { + err = PTR_ERR(new_path); + goto err_put_old_path; + } + + if (d_inode(old_dentry)->i_mode & S_IFDIR) + shfl_flags = 0; + + err = vboxsf_rename(sbi->root, old_path, new_path, shfl_flags); + if (err == 0) { + /* parent directories access/change time changed */ + sf_new_parent_i->force_restat = 1; + sf_old_parent_i->force_restat = 1; + } + + __putname(new_path); +err_put_old_path: + __putname(old_path); + return err; +} + +static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry, + const char *symname) +{ + struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent); + struct vboxsf_sbi *sbi = VBOXSF_SBI(parent->i_sb); + int symname_size = strlen(symname) + 1; + struct shfl_string *path, *ssymname; + struct shfl_fsobjinfo info; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + ssymname = kmalloc(SHFLSTRING_HEADER_SIZE + symname_size, GFP_KERNEL); + if (!ssymname) { + __putname(path); + return -ENOMEM; + } + ssymname->length = symname_size - 1; + ssymname->size = symname_size; + memcpy(ssymname->string.utf8, symname, symname_size); + + err = vboxsf_symlink(sbi->root, path, ssymname, &info); + kfree(ssymname); + __putname(path); + if (err) { + /* -EROFS means symlinks are note support -> -EPERM */ + return (err == -EROFS) ? -EPERM : err; + } + + err = vboxsf_dir_instantiate(parent, dentry, &info); + if (err) + return err; + + /* parent directory access/change time changed */ + sf_parent_i->force_restat = 1; + return 0; +} + +const struct inode_operations vboxsf_dir_iops = { + .lookup = vboxsf_dir_lookup, + .create = vboxsf_dir_mkfile, + .mkdir = vboxsf_dir_mkdir, + .rmdir = vboxsf_dir_unlink, + .unlink = vboxsf_dir_unlink, + .rename = vboxsf_dir_rename, + .symlink = vboxsf_dir_symlink, + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr, +}; diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c new file mode 100644 index 000000000000..c4ab5996d97a --- /dev/null +++ b/fs/vboxsf/file.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Regular file inode and file ops. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/page-flags.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/sizes.h> +#include "vfsmod.h" + +struct vboxsf_handle { + u64 handle; + u32 root; + u32 access_flags; + struct kref refcount; + struct list_head head; +}; + +static int vboxsf_file_open(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct shfl_createparms params = {}; + struct vboxsf_handle *sf_handle; + u32 access_flags = 0; + int err; + + sf_handle = kmalloc(sizeof(*sf_handle), GFP_KERNEL); + if (!sf_handle) + return -ENOMEM; + + /* + * We check the value of params.handle afterwards to find out if + * the call succeeded or failed, as the API does not seem to cleanly + * distinguish error and informational messages. + * + * Furthermore, we must set params.handle to SHFL_HANDLE_NIL to + * make the shared folders host service use our mode parameter. + */ + params.handle = SHFL_HANDLE_NIL; + if (file->f_flags & O_CREAT) { + params.create_flags |= SHFL_CF_ACT_CREATE_IF_NEW; + /* + * We ignore O_EXCL, as the Linux kernel seems to call create + * beforehand itself, so O_EXCL should always fail. + */ + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + else + params.create_flags |= SHFL_CF_ACT_OPEN_IF_EXISTS; + } else { + params.create_flags |= SHFL_CF_ACT_FAIL_IF_NEW; + if (file->f_flags & O_TRUNC) + params.create_flags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } + + switch (file->f_flags & O_ACCMODE) { + case O_RDONLY: + access_flags |= SHFL_CF_ACCESS_READ; + break; + + case O_WRONLY: + access_flags |= SHFL_CF_ACCESS_WRITE; + break; + + case O_RDWR: + access_flags |= SHFL_CF_ACCESS_READWRITE; + break; + + default: + WARN_ON(1); + } + + if (file->f_flags & O_APPEND) + access_flags |= SHFL_CF_ACCESS_APPEND; + + params.create_flags |= access_flags; + params.info.attr.mode = inode->i_mode; + + err = vboxsf_create_at_dentry(file_dentry(file), ¶ms); + if (err == 0 && params.handle == SHFL_HANDLE_NIL) + err = (params.result == SHFL_FILE_EXISTS) ? -EEXIST : -ENOENT; + if (err) { + kfree(sf_handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + + /* init our handle struct and add it to the inode's handles list */ + sf_handle->handle = params.handle; + sf_handle->root = VBOXSF_SBI(inode->i_sb)->root; + sf_handle->access_flags = access_flags; + kref_init(&sf_handle->refcount); + + mutex_lock(&sf_i->handle_list_mutex); + list_add(&sf_handle->head, &sf_i->handle_list); + mutex_unlock(&sf_i->handle_list_mutex); + + file->private_data = sf_handle; + return 0; +} + +static void vboxsf_handle_release(struct kref *refcount) +{ + struct vboxsf_handle *sf_handle = + container_of(refcount, struct vboxsf_handle, refcount); + + vboxsf_close(sf_handle->root, sf_handle->handle); + kfree(sf_handle); +} + +static int vboxsf_file_release(struct inode *inode, struct file *file) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle = file->private_data; + + /* + * When a file is closed on our (the guest) side, we want any subsequent + * accesses done on the host side to see all changes done from our side. + */ + filemap_write_and_wait(inode->i_mapping); + + mutex_lock(&sf_i->handle_list_mutex); + list_del(&sf_handle->head); + mutex_unlock(&sf_i->handle_list_mutex); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + return 0; +} + +/* + * Write back dirty pages now, because there may not be any suitable + * open files later + */ +static void vboxsf_vma_close(struct vm_area_struct *vma) +{ + filemap_write_and_wait(vma->vm_file->f_mapping); +} + +static const struct vm_operations_struct vboxsf_file_vm_ops = { + .close = vboxsf_vma_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, +}; + +static int vboxsf_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int err; + + err = generic_file_mmap(file, vma); + if (!err) + vma->vm_ops = &vboxsf_file_vm_ops; + + return err; +} + +/* + * Note that since we are accessing files on the host's filesystem, files + * may always be changed underneath us by the host! + * + * The vboxsf API between the guest and the host does not offer any functions + * to deal with this. There is no inode-generation to check for changes, no + * events / callback on changes and no way to lock files. + * + * To avoid returning stale data when a file gets *opened* on our (the guest) + * side, we do a "stat" on the host side, then compare the mtime with the + * last known mtime and invalidate the page-cache if they differ. + * This is done from vboxsf_inode_revalidate(). + * + * When reads are done through the read_iter fop, it is possible to do + * further cache revalidation then, there are 3 options to deal with this: + * + * 1) Rely solely on the revalidation done at open time + * 2) Do another "stat" and compare mtime again. Unfortunately the vboxsf + * host API does not allow stat on handles, so we would need to use + * file->f_path.dentry and the stat will then fail if the file was unlinked + * or renamed (and there is no thing like NFS' silly-rename). So we get: + * 2a) "stat" and compare mtime, on stat failure invalidate the cache + * 2b) "stat" and compare mtime, on stat failure do nothing + * 3) Simply always call invalidate_inode_pages2_range on the range of the read + * + * Currently we are keeping things KISS and using option 1. this allows + * directly using generic_file_read_iter without wrapping it. + * + * This means that only data written on the host side before open() on + * the guest side is guaranteed to be seen by the guest. If necessary + * we may provide other read-cache strategies in the future and make this + * configurable through a mount option. + */ +const struct file_operations vboxsf_reg_fops = { + .llseek = generic_file_llseek, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .mmap = vboxsf_file_mmap, + .open = vboxsf_file_open, + .release = vboxsf_file_release, + .fsync = noop_fsync, + .splice_read = generic_file_splice_read, +}; + +const struct inode_operations vboxsf_reg_iops = { + .getattr = vboxsf_getattr, + .setattr = vboxsf_setattr +}; + +static int vboxsf_readpage(struct file *file, struct page *page) +{ + struct vboxsf_handle *sf_handle = file->private_data; + loff_t off = page_offset(page); + u32 nread = PAGE_SIZE; + u8 *buf; + int err; + + buf = kmap(page); + + err = vboxsf_read(sf_handle->root, sf_handle->handle, off, &nread, buf); + if (err == 0) { + memset(&buf[nread], 0, PAGE_SIZE - nread); + flush_dcache_page(page); + SetPageUptodate(page); + } else { + SetPageError(page); + } + + kunmap(page); + unlock_page(page); + return err; +} + +static struct vboxsf_handle *vboxsf_get_write_handle(struct vboxsf_inode *sf_i) +{ + struct vboxsf_handle *h, *sf_handle = NULL; + + mutex_lock(&sf_i->handle_list_mutex); + list_for_each_entry(h, &sf_i->handle_list, head) { + if (h->access_flags == SHFL_CF_ACCESS_WRITE || + h->access_flags == SHFL_CF_ACCESS_READWRITE) { + kref_get(&h->refcount); + sf_handle = h; + break; + } + } + mutex_unlock(&sf_i->handle_list_mutex); + + return sf_handle; +} + +static int vboxsf_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + struct vboxsf_handle *sf_handle; + loff_t off = page_offset(page); + loff_t size = i_size_read(inode); + u32 nwrite = PAGE_SIZE; + u8 *buf; + int err; + + if (off + PAGE_SIZE > size) + nwrite = size & ~PAGE_MASK; + + sf_handle = vboxsf_get_write_handle(sf_i); + if (!sf_handle) + return -EBADF; + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + off, &nwrite, buf); + kunmap(page); + + kref_put(&sf_handle->refcount, vboxsf_handle_release); + + if (err == 0) { + ClearPageError(page); + /* mtime changed */ + sf_i->force_restat = 1; + } else { + ClearPageUptodate(page); + } + + unlock_page(page); + return err; +} + +static int vboxsf_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + struct vboxsf_handle *sf_handle = file->private_data; + unsigned int from = pos & ~PAGE_MASK; + u32 nwritten = len; + u8 *buf; + int err; + + /* zero the stale part of the page if we did a short copy */ + if (!PageUptodate(page) && copied < len) + zero_user(page, from + copied, len - copied); + + buf = kmap(page); + err = vboxsf_write(sf_handle->root, sf_handle->handle, + pos, &nwritten, buf + from); + kunmap(page); + + if (err) { + nwritten = 0; + goto out; + } + + /* mtime changed */ + VBOXSF_I(inode)->force_restat = 1; + + if (!PageUptodate(page) && nwritten == PAGE_SIZE) + SetPageUptodate(page); + + pos += nwritten; + if (pos > inode->i_size) + i_size_write(inode, pos); + +out: + unlock_page(page); + put_page(page); + + return nwritten; +} + +/* + * Note simple_write_begin does not read the page from disk on partial writes + * this is ok since vboxsf_write_end only writes the written parts of the + * page and it does not call SetPageUptodate for partial writes. + */ +const struct address_space_operations vboxsf_reg_aops = { + .readpage = vboxsf_readpage, + .writepage = vboxsf_writepage, + .set_page_dirty = __set_page_dirty_nobuffers, + .write_begin = simple_write_begin, + .write_end = vboxsf_write_end, +}; + +static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + struct shfl_string *path; + char *link; + int err; + + if (!dentry) + return ERR_PTR(-ECHILD); + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return ERR_CAST(path); + + link = kzalloc(PATH_MAX, GFP_KERNEL); + if (!link) { + __putname(path); + return ERR_PTR(-ENOMEM); + } + + err = vboxsf_readlink(sbi->root, path, PATH_MAX, link); + __putname(path); + if (err) { + kfree(link); + return ERR_PTR(err); + } + + set_delayed_call(done, kfree_link, link); + return link; +} + +const struct inode_operations vboxsf_lnk_iops = { + .get_link = vboxsf_get_link +}; diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h new file mode 100644 index 000000000000..aca829062c12 --- /dev/null +++ b/fs/vboxsf/shfl_hostintf.h @@ -0,0 +1,901 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Shared Folders: host interface definition. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef SHFL_HOSTINTF_H +#define SHFL_HOSTINTF_H + +#include <linux/vbox_vmmdev_types.h> + +/* The max in/out buffer size for a FN_READ or FN_WRITE call */ +#define SHFL_MAX_RW_COUNT (16 * SZ_1M) + +/* + * Structures shared between guest and the service + * can be relocated and use offsets to point to variable + * length parts. + * + * Shared folders protocol works with handles. + * Before doing any action on a file system object, + * one have to obtain the object handle via a SHFL_FN_CREATE + * request. A handle must be closed with SHFL_FN_CLOSE. + */ + +enum { + SHFL_FN_QUERY_MAPPINGS = 1, /* Query mappings changes. */ + SHFL_FN_QUERY_MAP_NAME = 2, /* Query map name. */ + SHFL_FN_CREATE = 3, /* Open/create object. */ + SHFL_FN_CLOSE = 4, /* Close object handle. */ + SHFL_FN_READ = 5, /* Read object content. */ + SHFL_FN_WRITE = 6, /* Write new object content. */ + SHFL_FN_LOCK = 7, /* Lock/unlock a range in the object. */ + SHFL_FN_LIST = 8, /* List object content. */ + SHFL_FN_INFORMATION = 9, /* Query/set object information. */ + /* Note function number 10 is not used! */ + SHFL_FN_REMOVE = 11, /* Remove object */ + SHFL_FN_MAP_FOLDER_OLD = 12, /* Map folder (legacy) */ + SHFL_FN_UNMAP_FOLDER = 13, /* Unmap folder */ + SHFL_FN_RENAME = 14, /* Rename object */ + SHFL_FN_FLUSH = 15, /* Flush file */ + SHFL_FN_SET_UTF8 = 16, /* Select UTF8 filename encoding */ + SHFL_FN_MAP_FOLDER = 17, /* Map folder */ + SHFL_FN_READLINK = 18, /* Read symlink dest (as of VBox 4.0) */ + SHFL_FN_SYMLINK = 19, /* Create symlink (as of VBox 4.0) */ + SHFL_FN_SET_SYMLINKS = 20, /* Ask host to show symlinks (4.0+) */ +}; + +/* Root handles for a mapping are of type u32, Root handles are unique. */ +#define SHFL_ROOT_NIL UINT_MAX + +/* Shared folders handle for an opened object are of type u64. */ +#define SHFL_HANDLE_NIL ULLONG_MAX + +/* Hardcoded maximum length (in chars) of a shared folder name. */ +#define SHFL_MAX_LEN (256) +/* Hardcoded maximum number of shared folder mapping available to the guest. */ +#define SHFL_MAX_MAPPINGS (64) + +/** Shared folder string buffer structure. */ +struct shfl_string { + /** Allocated size of the string member in bytes. */ + u16 size; + + /** Length of string without trailing nul in bytes. */ + u16 length; + + /** UTF-8 or UTF-16 string. Nul terminated. */ + union { + u8 utf8[2]; + u16 utf16[1]; + u16 ucs2[1]; /* misnomer, use utf16. */ + } string; +}; +VMMDEV_ASSERT_SIZE(shfl_string, 6); + +/* The size of shfl_string w/o the string part. */ +#define SHFLSTRING_HEADER_SIZE 4 + +/* Calculate size of the string. */ +static inline u32 shfl_string_buf_size(const struct shfl_string *string) +{ + return string ? SHFLSTRING_HEADER_SIZE + string->size : 0; +} + +/* Set user id on execution (S_ISUID). */ +#define SHFL_UNIX_ISUID 0004000U +/* Set group id on execution (S_ISGID). */ +#define SHFL_UNIX_ISGID 0002000U +/* Sticky bit (S_ISVTX / S_ISTXT). */ +#define SHFL_UNIX_ISTXT 0001000U + +/* Owner readable (S_IRUSR). */ +#define SHFL_UNIX_IRUSR 0000400U +/* Owner writable (S_IWUSR). */ +#define SHFL_UNIX_IWUSR 0000200U +/* Owner executable (S_IXUSR). */ +#define SHFL_UNIX_IXUSR 0000100U + +/* Group readable (S_IRGRP). */ +#define SHFL_UNIX_IRGRP 0000040U +/* Group writable (S_IWGRP). */ +#define SHFL_UNIX_IWGRP 0000020U +/* Group executable (S_IXGRP). */ +#define SHFL_UNIX_IXGRP 0000010U + +/* Other readable (S_IROTH). */ +#define SHFL_UNIX_IROTH 0000004U +/* Other writable (S_IWOTH). */ +#define SHFL_UNIX_IWOTH 0000002U +/* Other executable (S_IXOTH). */ +#define SHFL_UNIX_IXOTH 0000001U + +/* Named pipe (fifo) (S_IFIFO). */ +#define SHFL_TYPE_FIFO 0010000U +/* Character device (S_IFCHR). */ +#define SHFL_TYPE_DEV_CHAR 0020000U +/* Directory (S_IFDIR). */ +#define SHFL_TYPE_DIRECTORY 0040000U +/* Block device (S_IFBLK). */ +#define SHFL_TYPE_DEV_BLOCK 0060000U +/* Regular file (S_IFREG). */ +#define SHFL_TYPE_FILE 0100000U +/* Symbolic link (S_IFLNK). */ +#define SHFL_TYPE_SYMLINK 0120000U +/* Socket (S_IFSOCK). */ +#define SHFL_TYPE_SOCKET 0140000U +/* Whiteout (S_IFWHT). */ +#define SHFL_TYPE_WHITEOUT 0160000U +/* Type mask (S_IFMT). */ +#define SHFL_TYPE_MASK 0170000U + +/* Checks the mode flags indicate a directory (S_ISDIR). */ +#define SHFL_IS_DIRECTORY(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_DIRECTORY) +/* Checks the mode flags indicate a symbolic link (S_ISLNK). */ +#define SHFL_IS_SYMLINK(m) (((m) & SHFL_TYPE_MASK) == SHFL_TYPE_SYMLINK) + +/** The available additional information in a shfl_fsobjattr object. */ +enum shfl_fsobjattr_add { + /** No additional information is available / requested. */ + SHFLFSOBJATTRADD_NOTHING = 1, + /** + * The additional unix attributes (shfl_fsobjattr::u::unix_attr) are + * available / requested. + */ + SHFLFSOBJATTRADD_UNIX, + /** + * The additional extended attribute size (shfl_fsobjattr::u::size) is + * available / requested. + */ + SHFLFSOBJATTRADD_EASIZE, + /** + * The last valid item (inclusive). + * The valid range is SHFLFSOBJATTRADD_NOTHING thru + * SHFLFSOBJATTRADD_LAST. + */ + SHFLFSOBJATTRADD_LAST = SHFLFSOBJATTRADD_EASIZE, + + /** The usual 32-bit hack. */ + SHFLFSOBJATTRADD_32BIT_SIZE_HACK = 0x7fffffff +}; + +/** + * Additional unix Attributes, these are available when + * shfl_fsobjattr.additional == SHFLFSOBJATTRADD_UNIX. + */ +struct shfl_fsobjattr_unix { + /** + * The user owning the filesystem object (st_uid). + * This field is ~0U if not supported. + */ + u32 uid; + + /** + * The group the filesystem object is assigned (st_gid). + * This field is ~0U if not supported. + */ + u32 gid; + + /** + * Number of hard links to this filesystem object (st_nlink). + * This field is 1 if the filesystem doesn't support hardlinking or + * the information isn't available. + */ + u32 hardlinks; + + /** + * The device number of the device which this filesystem object resides + * on (st_dev). This field is 0 if this information is not available. + */ + u32 inode_id_device; + + /** + * The unique identifier (within the filesystem) of this filesystem + * object (st_ino). Together with inode_id_device, this field can be + * used as a OS wide unique id, when both their values are not 0. + * This field is 0 if the information is not available. + */ + u64 inode_id; + + /** + * User flags (st_flags). + * This field is 0 if this information is not available. + */ + u32 flags; + + /** + * The current generation number (st_gen). + * This field is 0 if this information is not available. + */ + u32 generation_id; + + /** + * The device number of a char. or block device type object (st_rdev). + * This field is 0 if the file isn't a char. or block device or when + * the OS doesn't use the major+minor device idenfication scheme. + */ + u32 device; +} __packed; + +/** Extended attribute size. */ +struct shfl_fsobjattr_easize { + /** Size of EAs. */ + s64 cb; +} __packed; + +/** Shared folder filesystem object attributes. */ +struct shfl_fsobjattr { + /** Mode flags (st_mode). SHFL_UNIX_*, SHFL_TYPE_*, and SHFL_DOS_*. */ + u32 mode; + + /** The additional attributes available. */ + enum shfl_fsobjattr_add additional; + + /** + * Additional attributes. + * + * Unless explicitly specified to an API, the API can provide additional + * data as it is provided by the underlying OS. + */ + union { + struct shfl_fsobjattr_unix unix_attr; + struct shfl_fsobjattr_easize size; + } __packed u; +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjattr, 44); + +struct shfl_timespec { + s64 ns_relative_to_unix_epoch; +}; + +/** Filesystem object information structure. */ +struct shfl_fsobjinfo { + /** + * Logical size (st_size). + * For normal files this is the size of the file. + * For symbolic links, this is the length of the path name contained + * in the symbolic link. + * For other objects this fields needs to be specified. + */ + s64 size; + + /** Disk allocation size (st_blocks * DEV_BSIZE). */ + s64 allocated; + + /** Time of last access (st_atime). */ + struct shfl_timespec access_time; + + /** Time of last data modification (st_mtime). */ + struct shfl_timespec modification_time; + + /** + * Time of last status change (st_ctime). + * If not available this is set to modification_time. + */ + struct shfl_timespec change_time; + + /** + * Time of file birth (st_birthtime). + * If not available this is set to change_time. + */ + struct shfl_timespec birth_time; + + /** Attributes. */ + struct shfl_fsobjattr attr; + +} __packed; +VMMDEV_ASSERT_SIZE(shfl_fsobjinfo, 92); + +/** + * result of an open/create request. + * Along with handle value the result code + * identifies what has happened while + * trying to open the object. + */ +enum shfl_create_result { + SHFL_NO_RESULT, + /** Specified path does not exist. */ + SHFL_PATH_NOT_FOUND, + /** Path to file exists, but the last component does not. */ + SHFL_FILE_NOT_FOUND, + /** File already exists and either has been opened or not. */ + SHFL_FILE_EXISTS, + /** New file was created. */ + SHFL_FILE_CREATED, + /** Existing file was replaced or overwritten. */ + SHFL_FILE_REPLACED +}; + +/* No flags. Initialization value. */ +#define SHFL_CF_NONE (0x00000000) + +/* + * Only lookup the object, do not return a handle. When this is set all other + * flags are ignored. + */ +#define SHFL_CF_LOOKUP (0x00000001) + +/* + * Open parent directory of specified object. + * Useful for the corresponding Windows FSD flag + * and for opening paths like \\dir\\*.* to search the 'dir'. + */ +#define SHFL_CF_OPEN_TARGET_DIRECTORY (0x00000002) + +/* Create/open a directory. */ +#define SHFL_CF_DIRECTORY (0x00000004) + +/* + * Open/create action to do if object exists + * and if the object does not exists. + * REPLACE file means atomically DELETE and CREATE. + * OVERWRITE file means truncating the file to 0 and + * setting new size. + * When opening an existing directory REPLACE and OVERWRITE + * actions are considered invalid, and cause returning + * FILE_EXISTS with NIL handle. + */ +#define SHFL_CF_ACT_MASK_IF_EXISTS (0x000000f0) +#define SHFL_CF_ACT_MASK_IF_NEW (0x00000f00) + +/* What to do if object exists. */ +#define SHFL_CF_ACT_OPEN_IF_EXISTS (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_EXISTS (0x00000010) +#define SHFL_CF_ACT_REPLACE_IF_EXISTS (0x00000020) +#define SHFL_CF_ACT_OVERWRITE_IF_EXISTS (0x00000030) + +/* What to do if object does not exist. */ +#define SHFL_CF_ACT_CREATE_IF_NEW (0x00000000) +#define SHFL_CF_ACT_FAIL_IF_NEW (0x00000100) + +/* Read/write requested access for the object. */ +#define SHFL_CF_ACCESS_MASK_RW (0x00003000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_READ (0x00001000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_WRITE (0x00002000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_READWRITE (0x00003000) + +/* Requested share access for the object. */ +#define SHFL_CF_ACCESS_MASK_DENY (0x0000c000) + +/* Allow any access. */ +#define SHFL_CF_ACCESS_DENYNONE (0x00000000) +/* Do not allow read. */ +#define SHFL_CF_ACCESS_DENYREAD (0x00004000) +/* Do not allow write. */ +#define SHFL_CF_ACCESS_DENYWRITE (0x00008000) +/* Do not allow access. */ +#define SHFL_CF_ACCESS_DENYALL (0x0000c000) + +/* Requested access to attributes of the object. */ +#define SHFL_CF_ACCESS_MASK_ATTR (0x00030000) + +/* No access requested. */ +#define SHFL_CF_ACCESS_ATTR_NONE (0x00000000) +/* Read access requested. */ +#define SHFL_CF_ACCESS_ATTR_READ (0x00010000) +/* Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_WRITE (0x00020000) +/* Read/Write access requested. */ +#define SHFL_CF_ACCESS_ATTR_READWRITE (0x00030000) + +/* + * The file is opened in append mode. + * Ignored if SHFL_CF_ACCESS_WRITE is not set. + */ +#define SHFL_CF_ACCESS_APPEND (0x00040000) + +/** Create parameters buffer struct for SHFL_FN_CREATE call */ +struct shfl_createparms { + /** Returned handle of opened object. */ + u64 handle; + + /** Returned result of the operation */ + enum shfl_create_result result; + + /** SHFL_CF_* */ + u32 create_flags; + + /** + * Attributes of object to create and + * returned actual attributes of opened/created object. + */ + struct shfl_fsobjinfo info; +} __packed; + +/** Shared Folder directory information */ +struct shfl_dirinfo { + /** Full information about the object. */ + struct shfl_fsobjinfo info; + /** + * The length of the short field (number of UTF16 chars). + * It is 16-bit for reasons of alignment. + */ + u16 short_name_len; + /** + * The short name for 8.3 compatibility. + * Empty string if not available. + */ + u16 short_name[14]; + struct shfl_string name; +}; + +/** Shared folder filesystem properties. */ +struct shfl_fsproperties { + /** + * The maximum size of a filesystem object name. + * This does not include the '\\0'. + */ + u32 max_component_len; + + /** + * True if the filesystem is remote. + * False if the filesystem is local. + */ + bool remote; + + /** + * True if the filesystem is case sensitive. + * False if the filesystem is case insensitive. + */ + bool case_sensitive; + + /** + * True if the filesystem is mounted read only. + * False if the filesystem is mounted read write. + */ + bool read_only; + + /** + * True if the filesystem can encode unicode object names. + * False if it can't. + */ + bool supports_unicode; + + /** + * True if the filesystem is compresses. + * False if it isn't or we don't know. + */ + bool compressed; + + /** + * True if the filesystem compresses of individual files. + * False if it doesn't or we don't know. + */ + bool file_compression; +}; +VMMDEV_ASSERT_SIZE(shfl_fsproperties, 12); + +struct shfl_volinfo { + s64 total_allocation_bytes; + s64 available_allocation_bytes; + u32 bytes_per_allocation_unit; + u32 bytes_per_sector; + u32 serial; + struct shfl_fsproperties properties; +}; + + +/** SHFL_FN_MAP_FOLDER Parameters structure. */ +struct shfl_map_folder { + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: UTF16 + * Path delimiter + */ + struct vmmdev_hgcm_function_parameter delimiter; + + /** + * pointer, in: SHFLROOT (u32) + * Case senstive flag + */ + struct vmmdev_hgcm_function_parameter case_sensitive; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_MAP_FOLDER (4) + + +/** SHFL_FN_UNMAP_FOLDER Parameters structure. */ +struct shfl_unmap_folder { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_UNMAP_FOLDER (1) + + +/** SHFL_FN_CREATE Parameters structure. */ +struct shfl_create { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, in/out: + * Points to struct shfl_createparms buffer. + */ + struct vmmdev_hgcm_function_parameter parms; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CREATE (3) + + +/** SHFL_FN_CLOSE Parameters structure. */ +struct shfl_close { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to close. + */ + struct vmmdev_hgcm_function_parameter handle; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_CLOSE (2) + + +/** SHFL_FN_READ Parameters structure. */ +struct shfl_read { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to read from. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to read from. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to read/How many were read. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READ (5) + + +/** SHFL_FN_WRITE Parameters structure. */ +struct shfl_write { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to write to. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value64, in: + * Offset to write to. + */ + struct vmmdev_hgcm_function_parameter offset; + + /** + * value64, in/out: + * Bytes to write/How many were written. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in: + * Data to write. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_WRITE (5) + + +/* + * SHFL_FN_LIST + * Listing information includes variable length RTDIRENTRY[EX] structures. + */ + +#define SHFL_LIST_NONE 0 +#define SHFL_LIST_RETURN_ONE 1 + +/** SHFL_FN_LIST Parameters structure. */ +struct shfl_list { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * List flags SHFL_LIST_*. + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for listing information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/optional + * Points to struct shfl_string buffer that specifies a search path. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place listing information to. (struct shfl_dirinfo) + */ + struct vmmdev_hgcm_function_parameter buffer; + + /** + * value32, in/out: + * Indicates a key where the listing must be resumed. + * in: 0 means start from begin of object. + * out: 0 means listing completed. + */ + struct vmmdev_hgcm_function_parameter resume_point; + + /** + * pointer, out: + * Number of files returned + */ + struct vmmdev_hgcm_function_parameter file_count; +}; + +/* Number of parameters */ +#define SHFL_CPARMS_LIST (8) + + +/** SHFL_FN_READLINK Parameters structure. */ +struct shfl_readLink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * pointer, out: + * Buffer to place data to. + */ + struct vmmdev_hgcm_function_parameter buffer; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_READLINK (3) + + +/* SHFL_FN_INFORMATION */ + +/* Mask of Set/Get bit. */ +#define SHFL_INFO_MODE_MASK (0x1) +/* Get information */ +#define SHFL_INFO_GET (0x0) +/* Set information */ +#define SHFL_INFO_SET (0x1) + +/* Get name of the object. */ +#define SHFL_INFO_NAME (0x2) +/* Set size of object (extend/trucate); only applies to file objects */ +#define SHFL_INFO_SIZE (0x4) +/* Get/Set file object info. */ +#define SHFL_INFO_FILE (0x8) +/* Get volume information. */ +#define SHFL_INFO_VOLUME (0x10) + +/** SHFL_FN_INFORMATION Parameters structure. */ +struct shfl_information { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * value64, in: + * SHFLHANDLE (u64) of object to be listed. + */ + struct vmmdev_hgcm_function_parameter handle; + + /** + * value32, in: + * SHFL_INFO_* + */ + struct vmmdev_hgcm_function_parameter flags; + + /** + * value32, in/out: + * Bytes to be used for information/How many bytes were used. + */ + struct vmmdev_hgcm_function_parameter cb; + + /** + * pointer, in/out: + * Information to be set/get (shfl_fsobjinfo or shfl_string). Do not + * forget to set the shfl_fsobjinfo::attr::additional for a get + * operation as well. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +/* Number of parameters */ +#define SHFL_CPARMS_INFORMATION (5) + + +/* SHFL_FN_REMOVE */ + +#define SHFL_REMOVE_FILE (0x1) +#define SHFL_REMOVE_DIR (0x2) +#define SHFL_REMOVE_SYMLINK (0x4) + +/** SHFL_FN_REMOVE Parameters structure. */ +struct shfl_remove { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string buffer. + */ + struct vmmdev_hgcm_function_parameter path; + + /** + * value32, in: + * remove flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_REMOVE (3) + + +/* SHFL_FN_RENAME */ + +#define SHFL_RENAME_FILE (0x1) +#define SHFL_RENAME_DIR (0x2) +#define SHFL_RENAME_REPLACE_IF_EXISTS (0x4) + +/** SHFL_FN_RENAME Parameters structure. */ +struct shfl_rename { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string src. + */ + struct vmmdev_hgcm_function_parameter src; + + /** + * pointer, in: + * Points to struct shfl_string dest. + */ + struct vmmdev_hgcm_function_parameter dest; + + /** + * value32, in: + * rename flags (file/directory) + */ + struct vmmdev_hgcm_function_parameter flags; + +}; + +#define SHFL_CPARMS_RENAME (4) + + +/** SHFL_FN_SYMLINK Parameters structure. */ +struct shfl_symlink { + /** + * pointer, in: SHFLROOT (u32) + * Root handle of the mapping which name is queried. + */ + struct vmmdev_hgcm_function_parameter root; + + /** + * pointer, in: + * Points to struct shfl_string of path for the new symlink. + */ + struct vmmdev_hgcm_function_parameter new_path; + + /** + * pointer, in: + * Points to struct shfl_string of destination for symlink. + */ + struct vmmdev_hgcm_function_parameter old_path; + + /** + * pointer, out: + * Information about created symlink. + */ + struct vmmdev_hgcm_function_parameter info; + +}; + +#define SHFL_CPARMS_SYMLINK (4) + +#endif diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c new file mode 100644 index 000000000000..675e26989376 --- /dev/null +++ b/fs/vboxsf/super.c @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Virtual File System. + * + * Module initialization/finalization + * File system registration/deregistration + * Superblock reading + * Few utility functions + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/idr.h> +#include <linux/fs_parser.h> +#include <linux/magic.h> +#include <linux/module.h> +#include <linux/nls.h> +#include <linux/statfs.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ + +#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000') +#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377') +#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376') +#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375') + +static int follow_symlinks; +module_param(follow_symlinks, int, 0444); +MODULE_PARM_DESC(follow_symlinks, + "Let host resolve symlinks rather than showing them"); + +static DEFINE_IDA(vboxsf_bdi_ida); +static DEFINE_MUTEX(vboxsf_setup_mutex); +static bool vboxsf_setup_done; +static struct super_operations vboxsf_super_ops; /* forward declaration */ +static struct kmem_cache *vboxsf_inode_cachep; + +static char * const vboxsf_default_nls = CONFIG_NLS_DEFAULT; + +enum { opt_nls, opt_uid, opt_gid, opt_ttl, opt_dmode, opt_fmode, + opt_dmask, opt_fmask }; + +static const struct fs_parameter_spec vboxsf_fs_parameters[] = { + fsparam_string ("nls", opt_nls), + fsparam_u32 ("uid", opt_uid), + fsparam_u32 ("gid", opt_gid), + fsparam_u32 ("ttl", opt_ttl), + fsparam_u32oct ("dmode", opt_dmode), + fsparam_u32oct ("fmode", opt_fmode), + fsparam_u32oct ("dmask", opt_dmask), + fsparam_u32oct ("fmask", opt_fmask), + {} +}; + +static int vboxsf_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct fs_parse_result result; + kuid_t uid; + kgid_t gid; + int opt; + + opt = fs_parse(fc, vboxsf_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case opt_nls: + if (ctx->nls_name || fc->purpose != FS_CONTEXT_FOR_MOUNT) { + vbg_err("vboxsf: Cannot reconfigure nls option\n"); + return -EINVAL; + } + ctx->nls_name = param->string; + param->string = NULL; + break; + case opt_uid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + return -EINVAL; + ctx->o.uid = uid; + break; + case opt_gid: + gid = make_kgid(current_user_ns(), result.uint_32); + if (!gid_valid(gid)) + return -EINVAL; + ctx->o.gid = gid; + break; + case opt_ttl: + ctx->o.ttl = msecs_to_jiffies(result.uint_32); + break; + case opt_dmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.dmode = result.uint_32; + ctx->o.dmode_set = true; + break; + case opt_fmode: + if (result.uint_32 & ~0777) + return -EINVAL; + ctx->o.fmode = result.uint_32; + ctx->o.fmode_set = true; + break; + case opt_dmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.dmask = result.uint_32; + break; + case opt_fmask: + if (result.uint_32 & ~07777) + return -EINVAL; + ctx->o.fmask = result.uint_32; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + struct shfl_string *folder_name, root_path; + struct vboxsf_sbi *sbi; + struct dentry *droot; + struct inode *iroot; + char *nls_name; + size_t size; + int err; + + if (!fc->source) + return -EINVAL; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sbi->o = ctx->o; + idr_init(&sbi->ino_idr); + spin_lock_init(&sbi->ino_idr_lock); + sbi->next_generation = 1; + sbi->bdi_id = -1; + + /* Load nls if not utf8 */ + nls_name = ctx->nls_name ? ctx->nls_name : vboxsf_default_nls; + if (strcmp(nls_name, "utf8") != 0) { + if (nls_name == vboxsf_default_nls) + sbi->nls = load_nls_default(); + else + sbi->nls = load_nls(nls_name); + + if (!sbi->nls) { + vbg_err("vboxsf: Count not load '%s' nls\n", nls_name); + err = -EINVAL; + goto fail_free; + } + } + + sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL); + if (sbi->bdi_id < 0) { + err = sbi->bdi_id; + goto fail_free; + } + + err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + if (err) + goto fail_free; + + /* Turn source into a shfl_string and map the folder */ + size = strlen(fc->source) + 1; + folder_name = kmalloc(SHFLSTRING_HEADER_SIZE + size, GFP_KERNEL); + if (!folder_name) { + err = -ENOMEM; + goto fail_free; + } + folder_name->size = size; + folder_name->length = size - 1; + strlcpy(folder_name->string.utf8, fc->source, size); + err = vboxsf_map_folder(folder_name, &sbi->root); + kfree(folder_name); + if (err) { + vbg_err("vboxsf: Host rejected mount of '%s' with error %d\n", + fc->source, err); + goto fail_free; + } + + root_path.length = 1; + root_path.size = 2; + root_path.string.utf8[0] = '/'; + root_path.string.utf8[1] = 0; + err = vboxsf_stat(sbi, &root_path, &sbi->root_info); + if (err) + goto fail_unmap; + + sb->s_magic = VBOXSF_SUPER_MAGIC; + sb->s_blocksize = 1024; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &vboxsf_super_ops; + sb->s_d_op = &vboxsf_dentry_ops; + + iroot = iget_locked(sb, 0); + if (!iroot) { + err = -ENOMEM; + goto fail_unmap; + } + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + unlock_new_inode(iroot); + + droot = d_make_root(iroot); + if (!droot) { + err = -ENOMEM; + goto fail_unmap; + } + + sb->s_root = droot; + sb->s_fs_info = sbi; + return 0; + +fail_unmap: + vboxsf_unmap_folder(sbi->root); +fail_free: + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + idr_destroy(&sbi->ino_idr); + kfree(sbi); + return err; +} + +static void vboxsf_inode_init_once(void *data) +{ + struct vboxsf_inode *sf_i = data; + + mutex_init(&sf_i->handle_list_mutex); + inode_init_once(&sf_i->vfs_inode); +} + +static struct inode *vboxsf_alloc_inode(struct super_block *sb) +{ + struct vboxsf_inode *sf_i; + + sf_i = kmem_cache_alloc(vboxsf_inode_cachep, GFP_NOFS); + if (!sf_i) + return NULL; + + sf_i->force_restat = 0; + INIT_LIST_HEAD(&sf_i->handle_list); + + return &sf_i->vfs_inode; +} + +static void vboxsf_free_inode(struct inode *inode) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(inode->i_sb); + unsigned long flags; + + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + idr_remove(&sbi->ino_idr, inode->i_ino); + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + kmem_cache_free(vboxsf_inode_cachep, VBOXSF_I(inode)); +} + +static void vboxsf_put_super(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + + vboxsf_unmap_folder(sbi->root); + if (sbi->bdi_id >= 0) + ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id); + if (sbi->nls) + unload_nls(sbi->nls); + + /* + * vboxsf_free_inode uses the idr, make sure all delayed rcu free + * inodes are flushed. + */ + rcu_barrier(); + idr_destroy(&sbi->ino_idr); + kfree(sbi); +} + +static int vboxsf_statfs(struct dentry *dentry, struct kstatfs *stat) +{ + struct super_block *sb = dentry->d_sb; + struct shfl_volinfo shfl_volinfo; + struct vboxsf_sbi *sbi; + u32 buf_len; + int err; + + sbi = VBOXSF_SBI(sb); + buf_len = sizeof(shfl_volinfo); + err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME, + &buf_len, &shfl_volinfo); + if (err) + return err; + + stat->f_type = VBOXSF_SUPER_MAGIC; + stat->f_bsize = shfl_volinfo.bytes_per_allocation_unit; + + do_div(shfl_volinfo.total_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_blocks = shfl_volinfo.total_allocation_bytes; + + do_div(shfl_volinfo.available_allocation_bytes, + shfl_volinfo.bytes_per_allocation_unit); + stat->f_bfree = shfl_volinfo.available_allocation_bytes; + stat->f_bavail = shfl_volinfo.available_allocation_bytes; + + stat->f_files = 1000; + /* + * Don't return 0 here since the guest may then think that it is not + * possible to create any more files. + */ + stat->f_ffree = 1000000; + stat->f_fsid.val[0] = 0; + stat->f_fsid.val[1] = 0; + stat->f_namelen = 255; + return 0; +} + +static struct super_operations vboxsf_super_ops = { + .alloc_inode = vboxsf_alloc_inode, + .free_inode = vboxsf_free_inode, + .put_super = vboxsf_put_super, + .statfs = vboxsf_statfs, +}; + +static int vboxsf_setup(void) +{ + int err; + + mutex_lock(&vboxsf_setup_mutex); + + if (vboxsf_setup_done) + goto success; + + vboxsf_inode_cachep = + kmem_cache_create("vboxsf_inode_cache", + sizeof(struct vboxsf_inode), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | + SLAB_ACCOUNT), + vboxsf_inode_init_once); + if (!vboxsf_inode_cachep) { + err = -ENOMEM; + goto fail_nomem; + } + + err = vboxsf_connect(); + if (err) { + vbg_err("vboxsf: err %d connecting to guest PCI-device\n", err); + vbg_err("vboxsf: make sure you are inside a VirtualBox VM\n"); + vbg_err("vboxsf: and check dmesg for vboxguest errors\n"); + goto fail_free_cache; + } + + err = vboxsf_set_utf8(); + if (err) { + vbg_err("vboxsf_setutf8 error %d\n", err); + goto fail_disconnect; + } + + if (!follow_symlinks) { + err = vboxsf_set_symlinks(); + if (err) + vbg_warn("vboxsf: Unable to show symlinks: %d\n", err); + } + + vboxsf_setup_done = true; +success: + mutex_unlock(&vboxsf_setup_mutex); + return 0; + +fail_disconnect: + vboxsf_disconnect(); +fail_free_cache: + kmem_cache_destroy(vboxsf_inode_cachep); +fail_nomem: + mutex_unlock(&vboxsf_setup_mutex); + return err; +} + +static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) +{ + char *options = data; + + if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && + options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && + options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 && + options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) { + vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n"); + return -EINVAL; + } + + return generic_parse_monolithic(fc, data); +} + +static int vboxsf_get_tree(struct fs_context *fc) +{ + int err; + + err = vboxsf_setup(); + if (err) + return err; + + return get_tree_nodev(fc, vboxsf_fill_super); +} + +static int vboxsf_reconfigure(struct fs_context *fc) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(fc->root->d_sb); + struct vboxsf_fs_context *ctx = fc->fs_private; + struct inode *iroot = fc->root->d_sb->s_root->d_inode; + + /* Apply changed options to the root inode */ + sbi->o = ctx->o; + vboxsf_init_inode(sbi, iroot, &sbi->root_info); + + return 0; +} + +static void vboxsf_free_fc(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx = fc->fs_private; + + kfree(ctx->nls_name); + kfree(ctx); +} + +static const struct fs_context_operations vboxsf_context_ops = { + .free = vboxsf_free_fc, + .parse_param = vboxsf_parse_param, + .parse_monolithic = vboxsf_parse_monolithic, + .get_tree = vboxsf_get_tree, + .reconfigure = vboxsf_reconfigure, +}; + +static int vboxsf_init_fs_context(struct fs_context *fc) +{ + struct vboxsf_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + current_uid_gid(&ctx->o.uid, &ctx->o.gid); + + fc->fs_private = ctx; + fc->ops = &vboxsf_context_ops; + return 0; +} + +static struct file_system_type vboxsf_fs_type = { + .owner = THIS_MODULE, + .name = "vboxsf", + .init_fs_context = vboxsf_init_fs_context, + .kill_sb = kill_anon_super +}; + +/* Module initialization/finalization handlers */ +static int __init vboxsf_init(void) +{ + return register_filesystem(&vboxsf_fs_type); +} + +static void __exit vboxsf_fini(void) +{ + unregister_filesystem(&vboxsf_fs_type); + + mutex_lock(&vboxsf_setup_mutex); + if (vboxsf_setup_done) { + vboxsf_disconnect(); + /* + * Make sure all delayed rcu free inodes are flushed + * before we destroy the cache. + */ + rcu_barrier(); + kmem_cache_destroy(vboxsf_inode_cachep); + } + mutex_unlock(&vboxsf_setup_mutex); +} + +module_init(vboxsf_init); +module_exit(vboxsf_fini); + +MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access"); +MODULE_AUTHOR("Oracle Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("vboxsf"); diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c new file mode 100644 index 000000000000..96bd160da48b --- /dev/null +++ b/fs/vboxsf/utils.c @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: MIT +/* + * VirtualBox Guest Shared Folders support: Utility functions. + * Mainly conversion from/to VirtualBox/Linux data structures. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/namei.h> +#include <linux/nls.h> +#include <linux/sizes.h> +#include <linux/vfs.h> +#include "vfsmod.h" + +struct inode *vboxsf_new_inode(struct super_block *sb) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(sb); + struct inode *inode; + unsigned long flags; + int cursor, ret; + u32 gen; + + inode = new_inode(sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&sbi->ino_idr_lock, flags); + cursor = idr_get_cursor(&sbi->ino_idr); + ret = idr_alloc_cyclic(&sbi->ino_idr, inode, 1, 0, GFP_ATOMIC); + if (ret >= 0 && ret < cursor) + sbi->next_generation++; + gen = sbi->next_generation; + spin_unlock_irqrestore(&sbi->ino_idr_lock, flags); + idr_preload_end(); + + if (ret < 0) { + iput(inode); + return ERR_PTR(ret); + } + + inode->i_ino = ret; + inode->i_generation = gen; + return inode; +} + +/* set [inode] attributes based on [info], uid/gid based on [sbi] */ +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info) +{ + const struct shfl_fsobjattr *attr; + s64 allocated; + int mode; + + attr = &info->attr; + +#define mode_set(r) ((attr->mode & (SHFL_UNIX_##r)) ? (S_##r) : 0) + + mode = mode_set(IRUSR); + mode |= mode_set(IWUSR); + mode |= mode_set(IXUSR); + + mode |= mode_set(IRGRP); + mode |= mode_set(IWGRP); + mode |= mode_set(IXGRP); + + mode |= mode_set(IROTH); + mode |= mode_set(IWOTH); + mode |= mode_set(IXOTH); + +#undef mode_set + + /* We use the host-side values for these */ + inode->i_flags |= S_NOATIME | S_NOCMTIME; + inode->i_mapping->a_ops = &vboxsf_reg_aops; + + if (SHFL_IS_DIRECTORY(attr->mode)) { + inode->i_mode = sbi->o.dmode_set ? sbi->o.dmode : mode; + inode->i_mode &= ~sbi->o.dmask; + inode->i_mode |= S_IFDIR; + inode->i_op = &vboxsf_dir_iops; + inode->i_fop = &vboxsf_dir_fops; + /* + * XXX: this probably should be set to the number of entries + * in the directory plus two (. ..) + */ + set_nlink(inode, 1); + } else if (SHFL_IS_SYMLINK(attr->mode)) { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFLNK; + inode->i_op = &vboxsf_lnk_iops; + set_nlink(inode, 1); + } else { + inode->i_mode = sbi->o.fmode_set ? sbi->o.fmode : mode; + inode->i_mode &= ~sbi->o.fmask; + inode->i_mode |= S_IFREG; + inode->i_op = &vboxsf_reg_iops; + inode->i_fop = &vboxsf_reg_fops; + set_nlink(inode, 1); + } + + inode->i_uid = sbi->o.uid; + inode->i_gid = sbi->o.gid; + + inode->i_size = info->size; + inode->i_blkbits = 12; + /* i_blocks always in units of 512 bytes! */ + allocated = info->allocated + 511; + do_div(allocated, 512); + inode->i_blocks = allocated; + + inode->i_atime = ns_to_timespec64( + info->access_time.ns_relative_to_unix_epoch); + inode->i_ctime = ns_to_timespec64( + info->change_time.ns_relative_to_unix_epoch); + inode->i_mtime = ns_to_timespec64( + info->modification_time.ns_relative_to_unix_epoch); +} + +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_create(sbi->root, path, params); + __putname(path); + + return err; +} + +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info) +{ + struct shfl_createparms params = {}; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + err = vboxsf_create(sbi->root, path, ¶ms); + if (err) + return err; + + if (params.result != SHFL_FILE_EXISTS) + return -ENOENT; + + if (info) + *info = params.info; + + return 0; +} + +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_string *path; + int err; + + path = vboxsf_path_from_dentry(sbi, dentry); + if (IS_ERR(path)) + return PTR_ERR(path); + + err = vboxsf_stat(sbi, path, info); + __putname(path); + return err; +} + +int vboxsf_inode_revalidate(struct dentry *dentry) +{ + struct vboxsf_sbi *sbi; + struct vboxsf_inode *sf_i; + struct shfl_fsobjinfo info; + struct timespec64 prev_mtime; + struct inode *inode; + int err; + + if (!dentry || !d_really_is_positive(dentry)) + return -EINVAL; + + inode = d_inode(dentry); + prev_mtime = inode->i_mtime; + sf_i = VBOXSF_I(inode); + sbi = VBOXSF_SBI(dentry->d_sb); + if (!sf_i->force_restat) { + if (time_before(jiffies, dentry->d_time + sbi->o.ttl)) + return 0; + } + + err = vboxsf_stat_dentry(dentry, &info); + if (err) + return err; + + dentry->d_time = jiffies; + sf_i->force_restat = 0; + vboxsf_init_inode(sbi, inode, &info); + + /* + * If the file was changed on the host side we need to invalidate the + * page-cache for it. Note this also gets triggered by our own writes, + * this is unavoidable. + */ + if (timespec64_compare(&inode->i_mtime, &prev_mtime) > 0) + invalidate_inode_pages2(inode->i_mapping); + + return 0; +} + +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int flags) +{ + int err; + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct vboxsf_inode *sf_i = VBOXSF_I(inode); + + switch (flags & AT_STATX_SYNC_TYPE) { + case AT_STATX_DONT_SYNC: + err = 0; + break; + case AT_STATX_FORCE_SYNC: + sf_i->force_restat = 1; + /* fall-through */ + default: + err = vboxsf_inode_revalidate(dentry); + } + if (err) + return err; + + generic_fillattr(d_inode(dentry), kstat); + return 0; +} + +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry)); + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + struct shfl_createparms params = {}; + struct shfl_fsobjinfo info = {}; + u32 buf_len; + int err; + + params.handle = SHFL_HANDLE_NIL; + params.create_flags = SHFL_CF_ACT_OPEN_IF_EXISTS | + SHFL_CF_ACT_FAIL_IF_NEW | + SHFL_CF_ACCESS_ATTR_WRITE; + + /* this is at least required for Posix hosts */ + if (iattr->ia_valid & ATTR_SIZE) + params.create_flags |= SHFL_CF_ACCESS_WRITE; + + err = vboxsf_create_at_dentry(dentry, ¶ms); + if (err || params.result != SHFL_FILE_EXISTS) + return err ? err : -ENOENT; + +#define mode_set(r) ((iattr->ia_mode & (S_##r)) ? SHFL_UNIX_##r : 0) + + /* + * Setting the file size and setting the other attributes has to + * be handled separately. + */ + if (iattr->ia_valid & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME)) { + if (iattr->ia_valid & ATTR_MODE) { + info.attr.mode = mode_set(IRUSR); + info.attr.mode |= mode_set(IWUSR); + info.attr.mode |= mode_set(IXUSR); + info.attr.mode |= mode_set(IRGRP); + info.attr.mode |= mode_set(IWGRP); + info.attr.mode |= mode_set(IXGRP); + info.attr.mode |= mode_set(IROTH); + info.attr.mode |= mode_set(IWOTH); + info.attr.mode |= mode_set(IXOTH); + + if (iattr->ia_mode & S_IFDIR) + info.attr.mode |= SHFL_TYPE_DIRECTORY; + else + info.attr.mode |= SHFL_TYPE_FILE; + } + + if (iattr->ia_valid & ATTR_ATIME) + info.access_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_atime); + + if (iattr->ia_valid & ATTR_MTIME) + info.modification_time.ns_relative_to_unix_epoch = + timespec64_to_ns(&iattr->ia_mtime); + + /* + * Ignore ctime (inode change time) as it can't be set + * from userland anyway. + */ + + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_FILE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + +#undef mode_set + + if (iattr->ia_valid & ATTR_SIZE) { + memset(&info, 0, sizeof(info)); + info.size = iattr->ia_size; + buf_len = sizeof(info); + err = vboxsf_fsinfo(sbi->root, params.handle, + SHFL_INFO_SET | SHFL_INFO_SIZE, &buf_len, + &info); + if (err) { + vboxsf_close(sbi->root, params.handle); + return err; + } + + /* the host may have given us different attr then requested */ + sf_i->force_restat = 1; + } + + vboxsf_close(sbi->root, params.handle); + + /* Update the inode with what the host has actually given us. */ + if (sf_i->force_restat) + vboxsf_inode_revalidate(dentry); + + return 0; +} + +/* + * [dentry] contains string encoded in coding system that corresponds + * to [sbi]->nls, we must convert it to UTF8 here. + * Returns a shfl_string allocated through __getname (must be freed using + * __putname), or an ERR_PTR on error. + */ +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry) +{ + struct shfl_string *shfl_path; + int path_len, out_len, nb; + char *buf, *path; + wchar_t uni; + u8 *out; + + buf = __getname(); + if (!buf) + return ERR_PTR(-ENOMEM); + + path = dentry_path_raw(dentry, buf, PATH_MAX); + if (IS_ERR(path)) { + __putname(buf); + return ERR_CAST(path); + } + path_len = strlen(path); + + if (sbi->nls) { + shfl_path = __getname(); + if (!shfl_path) { + __putname(buf); + return ERR_PTR(-ENOMEM); + } + + out = shfl_path->string.utf8; + out_len = PATH_MAX - SHFLSTRING_HEADER_SIZE - 1; + + while (path_len) { + nb = sbi->nls->char2uni(path, path_len, &uni); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-EINVAL); + } + path += nb; + path_len -= nb; + + nb = utf32_to_utf8(uni, out, out_len); + if (nb < 0) { + __putname(shfl_path); + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + out += nb; + out_len -= nb; + } + *out = 0; + shfl_path->length = out - shfl_path->string.utf8; + shfl_path->size = shfl_path->length + 1; + __putname(buf); + } else { + if ((SHFLSTRING_HEADER_SIZE + path_len + 1) > PATH_MAX) { + __putname(buf); + return ERR_PTR(-ENAMETOOLONG); + } + /* + * dentry_path stores the name at the end of buf, but the + * shfl_string string we return must be properly aligned. + */ + shfl_path = (struct shfl_string *)buf; + memmove(shfl_path->string.utf8, path, path_len); + shfl_path->string.utf8[path_len] = 0; + shfl_path->length = path_len; + shfl_path->size = path_len + 1; + } + + return shfl_path; +} + +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len) +{ + const char *in; + char *out; + size_t out_len; + size_t out_bound_len; + size_t in_bound_len; + + in = utf8_name; + in_bound_len = utf8_len; + + out = name; + out_len = 0; + /* Reserve space for terminating 0 */ + out_bound_len = name_bound_len - 1; + + while (in_bound_len) { + int nb; + unicode_t uni; + + nb = utf8_to_utf32(in, in_bound_len, &uni); + if (nb < 0) + return -EINVAL; + + in += nb; + in_bound_len -= nb; + + nb = sbi->nls->uni2char(uni, out, out_bound_len); + if (nb < 0) + return nb; + + out += nb; + out_bound_len -= nb; + out_len += nb; + } + + *out = 0; + + return 0; +} + +static struct vboxsf_dir_buf *vboxsf_dir_buf_alloc(struct list_head *list) +{ + struct vboxsf_dir_buf *b; + + b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) + return NULL; + + b->buf = kmalloc(DIR_BUFFER_SIZE, GFP_KERNEL); + if (!b->buf) { + kfree(b); + return NULL; + } + + b->entries = 0; + b->used = 0; + b->free = DIR_BUFFER_SIZE; + list_add(&b->head, list); + + return b; +} + +static void vboxsf_dir_buf_free(struct vboxsf_dir_buf *b) +{ + list_del(&b->head); + kfree(b->buf); + kfree(b); +} + +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void) +{ + struct vboxsf_dir_info *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->info_list); + return p; +} + +void vboxsf_dir_info_free(struct vboxsf_dir_info *p) +{ + struct list_head *list, *pos, *tmp; + + list = &p->info_list; + list_for_each_safe(pos, tmp, list) { + struct vboxsf_dir_buf *b; + + b = list_entry(pos, struct vboxsf_dir_buf, head); + vboxsf_dir_buf_free(b); + } + kfree(p); +} + +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle) +{ + struct vboxsf_dir_buf *b; + u32 entries, size; + int err = 0; + void *buf; + + /* vboxsf_dirinfo returns 1 on end of dir */ + while (err == 0) { + b = vboxsf_dir_buf_alloc(&sf_d->info_list); + if (!b) { + err = -ENOMEM; + break; + } + + buf = b->buf; + size = b->free; + + err = vboxsf_dirinfo(sbi->root, handle, NULL, 0, 0, + &size, buf, &entries); + if (err < 0) + break; + + b->entries += entries; + b->free -= size; + b->used += size; + } + + if (b && b->used == 0) + vboxsf_dir_buf_free(b); + + /* -EILSEQ means the host could not translate a filename, ignore */ + if (err > 0 || err == -EILSEQ) + err = 0; + + return err; +} diff --git a/fs/vboxsf/vboxsf_wrappers.c b/fs/vboxsf/vboxsf_wrappers.c new file mode 100644 index 000000000000..bfc78a097dae --- /dev/null +++ b/fs/vboxsf/vboxsf_wrappers.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: MIT +/* + * Wrapper functions for the shfl host calls. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vbox_err.h> +#include <linux/vbox_utils.h> +#include "vfsmod.h" + +#define SHFL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV_OTHER | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + +static u32 vboxsf_client_id; + +int vboxsf_connect(void) +{ + struct vbg_dev *gdev; + struct vmmdev_hgcm_service_location loc; + int err, vbox_status; + + loc.type = VMMDEV_HGCM_LOC_LOCALHOST_EXISTING; + strcpy(loc.u.localhost.service_name, "VBoxSharedFolders"); + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ENODEV; /* No guest-device */ + + err = vbg_hgcm_connect(gdev, SHFL_REQUEST, &loc, + &vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); + + return err ? err : vbg_status_code_to_errno(vbox_status); +} + +void vboxsf_disconnect(void) +{ + struct vbg_dev *gdev; + int vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return; /* guest-device is gone, already disconnected */ + + vbg_hgcm_disconnect(gdev, SHFL_REQUEST, vboxsf_client_id, &vbox_status); + vbg_put_gdev(gdev); +} + +static int vboxsf_call(u32 function, void *parms, u32 parm_count, int *status) +{ + struct vbg_dev *gdev; + int err, vbox_status; + + gdev = vbg_get_gdev(); + if (IS_ERR(gdev)) + return -ESHUTDOWN; /* guest-dev removed underneath us */ + + err = vbg_hgcm_call(gdev, SHFL_REQUEST, vboxsf_client_id, function, + U32_MAX, parms, parm_count, &vbox_status); + vbg_put_gdev(gdev); + + if (err < 0) + return err; + + if (status) + *status = vbox_status; + + return vbg_status_code_to_errno(vbox_status); +} + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root) +{ + struct shfl_map_folder parms; + int err, status; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(folder_name); + parms.path.u.pointer.u.linear_addr = (uintptr_t)folder_name; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = 0; + + parms.delimiter.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.delimiter.u.value32 = '/'; + + parms.case_sensitive.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.case_sensitive.u.value32 = 1; + + err = vboxsf_call(SHFL_FN_MAP_FOLDER, &parms, SHFL_CPARMS_MAP_FOLDER, + &status); + if (err == -ENOSYS && status == VERR_NOT_IMPLEMENTED) + vbg_err("%s: Error host is too old\n", __func__); + + *root = parms.root.u.value32; + return err; +} + +int vboxsf_unmap_folder(u32 root) +{ + struct shfl_unmap_folder parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + return vboxsf_call(SHFL_FN_UNMAP_FOLDER, &parms, + SHFL_CPARMS_UNMAP_FOLDER, NULL); +} + +/** + * vboxsf_create - Create a new file or folder + * @root: Root of the shared folder in which to create the file + * @parsed_path: The path of the file or folder relative to the shared folder + * @param: create_parms Parameters for file/folder creation. + * + * Create a new file or folder or open an existing one in a shared folder. + * Note this function always returns 0 / success unless an exceptional condition + * occurs - out of memory, invalid arguments, etc. If the file or folder could + * not be opened or created, create_parms->handle will be set to + * SHFL_HANDLE_NIL on return. In this case the value in create_parms->result + * provides information as to why (e.g. SHFL_FILE_EXISTS), create_parms->result + * is also set on success as additional information. + * + * Returns: + * 0 or negative errno value. + */ +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms) +{ + struct shfl_create parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.parms.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.parms.u.pointer.size = sizeof(struct shfl_createparms); + parms.parms.u.pointer.u.linear_addr = (uintptr_t)create_parms; + + return vboxsf_call(SHFL_FN_CREATE, &parms, SHFL_CPARMS_CREATE, NULL); +} + +int vboxsf_close(u32 root, u64 handle) +{ + struct shfl_close parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + + return vboxsf_call(SHFL_FN_CLOSE, &parms, SHFL_CPARMS_CLOSE, NULL); +} + +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags) +{ + struct shfl_remove parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_REMOVE, &parms, SHFL_CPARMS_REMOVE, NULL); +} + +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags) +{ + struct shfl_rename parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.src.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.src.u.pointer.size = shfl_string_buf_size(src_path); + parms.src.u.pointer.u.linear_addr = (uintptr_t)src_path; + + parms.dest.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.dest.u.pointer.size = shfl_string_buf_size(dest_path); + parms.dest.u.pointer.u.linear_addr = (uintptr_t)dest_path; + + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + + return vboxsf_call(SHFL_FN_RENAME, &parms, SHFL_CPARMS_RENAME, NULL); +} + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_read parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_READ, &parms, SHFL_CPARMS_READ, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf) +{ + struct shfl_write parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.offset.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.offset.u.value64 = offset; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_WRITE, &parms, SHFL_CPARMS_WRITE, NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +/* Returns 0 on success, 1 on end-of-dir, negative errno otherwise */ +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count) +{ + struct shfl_list parms; + int err, status; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + if (parsed_path) { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + } else { + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_IN; + parms.path.u.pointer.size = 0; + parms.path.u.pointer.u.linear_addr = 0; + } + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = *buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + parms.resume_point.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.resume_point.u.value32 = index; + parms.file_count.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.file_count.u.value32 = 0; /* out parameter only */ + + err = vboxsf_call(SHFL_FN_LIST, &parms, SHFL_CPARMS_LIST, &status); + if (err == -ENODATA && status == VERR_NO_MORE_FILES) + err = 1; + + *buf_len = parms.cb.u.value32; + *file_count = parms.file_count.u.value32; + return err; +} + +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf) +{ + struct shfl_information parms; + int err; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.handle.type = VMMDEV_HGCM_PARM_TYPE_64BIT; + parms.handle.u.value64 = handle; + parms.flags.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.flags.u.value32 = flags; + parms.cb.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.cb.u.value32 = *buf_len; + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL; + parms.info.u.pointer.size = *buf_len; + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + err = vboxsf_call(SHFL_FN_INFORMATION, &parms, SHFL_CPARMS_INFORMATION, + NULL); + + *buf_len = parms.cb.u.value32; + return err; +} + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf) +{ + struct shfl_readLink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.path.u.pointer.size = shfl_string_buf_size(parsed_path); + parms.path.u.pointer.u.linear_addr = (uintptr_t)parsed_path; + + parms.buffer.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.buffer.u.pointer.size = buf_len; + parms.buffer.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_READLINK, &parms, SHFL_CPARMS_READLINK, + NULL); +} + +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf) +{ + struct shfl_symlink parms; + + parms.root.type = VMMDEV_HGCM_PARM_TYPE_32BIT; + parms.root.u.value32 = root; + + parms.new_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.new_path.u.pointer.size = shfl_string_buf_size(new_path); + parms.new_path.u.pointer.u.linear_addr = (uintptr_t)new_path; + + parms.old_path.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_IN; + parms.old_path.u.pointer.size = shfl_string_buf_size(old_path); + parms.old_path.u.pointer.u.linear_addr = (uintptr_t)old_path; + + parms.info.type = VMMDEV_HGCM_PARM_TYPE_LINADDR_KERNEL_OUT; + parms.info.u.pointer.size = sizeof(struct shfl_fsobjinfo); + parms.info.u.pointer.u.linear_addr = (uintptr_t)buf; + + return vboxsf_call(SHFL_FN_SYMLINK, &parms, SHFL_CPARMS_SYMLINK, NULL); +} + +int vboxsf_set_utf8(void) +{ + return vboxsf_call(SHFL_FN_SET_UTF8, NULL, 0, NULL); +} + +int vboxsf_set_symlinks(void) +{ + return vboxsf_call(SHFL_FN_SET_SYMLINKS, NULL, 0, NULL); +} diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h new file mode 100644 index 000000000000..18f95b00fc33 --- /dev/null +++ b/fs/vboxsf/vfsmod.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: MIT */ +/* + * VirtualBox Guest Shared Folders support: module header. + * + * Copyright (C) 2006-2018 Oracle Corporation + */ + +#ifndef VFSMOD_H +#define VFSMOD_H + +#include <linux/backing-dev.h> +#include <linux/idr.h> +#include "shfl_hostintf.h" + +#define DIR_BUFFER_SIZE SZ_16K + +/* The cast is to prevent assignment of void * to pointers of arbitrary type */ +#define VBOXSF_SBI(sb) ((struct vboxsf_sbi *)(sb)->s_fs_info) +#define VBOXSF_I(i) container_of(i, struct vboxsf_inode, vfs_inode) + +struct vboxsf_options { + unsigned long ttl; + kuid_t uid; + kgid_t gid; + bool dmode_set; + bool fmode_set; + umode_t dmode; + umode_t fmode; + umode_t dmask; + umode_t fmask; +}; + +struct vboxsf_fs_context { + struct vboxsf_options o; + char *nls_name; +}; + +/* per-shared folder information */ +struct vboxsf_sbi { + struct vboxsf_options o; + struct shfl_fsobjinfo root_info; + struct idr ino_idr; + spinlock_t ino_idr_lock; /* This protects ino_idr */ + struct nls_table *nls; + u32 next_generation; + u32 root; + int bdi_id; +}; + +/* per-inode information */ +struct vboxsf_inode { + /* some information was changed, update data on next revalidate */ + int force_restat; + /* list of open handles for this inode + lock protecting it */ + struct list_head handle_list; + /* This mutex protects handle_list accesses */ + struct mutex handle_list_mutex; + /* The VFS inode struct */ + struct inode vfs_inode; +}; + +struct vboxsf_dir_info { + struct list_head info_list; +}; + +struct vboxsf_dir_buf { + size_t entries; + size_t free; + size_t used; + void *buf; + struct list_head head; +}; + +/* globals */ +extern const struct inode_operations vboxsf_dir_iops; +extern const struct inode_operations vboxsf_lnk_iops; +extern const struct inode_operations vboxsf_reg_iops; +extern const struct file_operations vboxsf_dir_fops; +extern const struct file_operations vboxsf_reg_fops; +extern const struct address_space_operations vboxsf_reg_aops; +extern const struct dentry_operations vboxsf_dentry_ops; + +/* from utils.c */ +struct inode *vboxsf_new_inode(struct super_block *sb); +void vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode, + const struct shfl_fsobjinfo *info); +int vboxsf_create_at_dentry(struct dentry *dentry, + struct shfl_createparms *params); +int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path, + struct shfl_fsobjinfo *info); +int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info); +int vboxsf_inode_revalidate(struct dentry *dentry); +int vboxsf_getattr(const struct path *path, struct kstat *kstat, + u32 request_mask, unsigned int query_flags); +int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr); +struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi, + struct dentry *dentry); +int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len); +struct vboxsf_dir_info *vboxsf_dir_info_alloc(void); +void vboxsf_dir_info_free(struct vboxsf_dir_info *p); +int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, + u64 handle); + +/* from vboxsf_wrappers.c */ +int vboxsf_connect(void); +void vboxsf_disconnect(void); + +int vboxsf_create(u32 root, struct shfl_string *parsed_path, + struct shfl_createparms *create_parms); + +int vboxsf_close(u32 root, u64 handle); +int vboxsf_remove(u32 root, struct shfl_string *parsed_path, u32 flags); +int vboxsf_rename(u32 root, struct shfl_string *src_path, + struct shfl_string *dest_path, u32 flags); + +int vboxsf_read(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); +int vboxsf_write(u32 root, u64 handle, u64 offset, u32 *buf_len, u8 *buf); + +int vboxsf_dirinfo(u32 root, u64 handle, + struct shfl_string *parsed_path, u32 flags, u32 index, + u32 *buf_len, struct shfl_dirinfo *buf, u32 *file_count); +int vboxsf_fsinfo(u32 root, u64 handle, u32 flags, + u32 *buf_len, void *buf); + +int vboxsf_map_folder(struct shfl_string *folder_name, u32 *root); +int vboxsf_unmap_folder(u32 root); + +int vboxsf_readlink(u32 root, struct shfl_string *parsed_path, + u32 buf_len, u8 *buf); +int vboxsf_symlink(u32 root, struct shfl_string *new_path, + struct shfl_string *old_path, struct shfl_fsobjinfo *buf); + +int vboxsf_set_utf8(void); +int vboxsf_set_symlinks(void); + +#endif diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig new file mode 100644 index 000000000000..fb87ad372e29 --- /dev/null +++ b/fs/zonefs/Kconfig @@ -0,0 +1,9 @@ +config ZONEFS_FS + tristate "zonefs filesystem support" + depends on BLOCK + depends on BLK_DEV_ZONED + help + zonefs is a simple file system which exposes zones of a zoned block + device (e.g. host-managed or host-aware SMR disk drives) as files. + + If unsure, say N. diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile new file mode 100644 index 000000000000..75a380aa1ae1 --- /dev/null +++ b/fs/zonefs/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ZONEFS_FS) += zonefs.o + +zonefs-y := super.o diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c new file mode 100644 index 000000000000..8bc6ef82d693 --- /dev/null +++ b/fs/zonefs/super.c @@ -0,0 +1,1439 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple file system for zoned block devices exposing zones as files. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/iomap.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/statfs.h> +#include <linux/writeback.h> +#include <linux/quotaops.h> +#include <linux/seq_file.h> +#include <linux/parser.h> +#include <linux/uio.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> +#include <linux/crc32.h> + +#include "zonefs.h" + +static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + + /* All I/Os should always be within the file maximum size */ + if (WARN_ON_ONCE(offset + length > zi->i_max_size)) + return -EIO; + + /* + * Sequential zones can only accept direct writes. This is already + * checked when writes are issued, so warn if we see a page writeback + * operation. + */ + if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT))) + return -EIO; + + /* + * For conventional zones, all blocks are always mapped. For sequential + * zones, all blocks after always mapped below the inode size (zone + * write pointer) and unwriten beyond. + */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (offset >= isize) + iomap->type = IOMAP_UNWRITTEN; + else + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_WRITE) + length = zi->i_max_size - offset; + else + length = min(length, isize - offset); + mutex_unlock(&zi->i_truncate_mutex); + + iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize); + iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset; + iomap->bdev = inode->i_sb->s_bdev; + iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset; + + return 0; +} + +static const struct iomap_ops zonefs_iomap_ops = { + .iomap_begin = zonefs_iomap_begin, +}; + +static int zonefs_readpage(struct file *unused, struct page *page) +{ + return iomap_readpage(page, &zonefs_iomap_ops); +} + +static int zonefs_readpages(struct file *unused, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops); +} + +/* + * Map blocks for page writeback. This is used only on conventional zone files, + * which implies that the page range can only be within the fixed inode size. + */ +static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc, + struct inode *inode, loff_t offset) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return -EIO; + if (WARN_ON_ONCE(offset >= i_size_read(inode))) + return -EIO; + + /* If the mapping is already OK, nothing needs to be done */ + if (offset >= wpc->iomap.offset && + offset < wpc->iomap.offset + wpc->iomap.length) + return 0; + + return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset, + IOMAP_WRITE, &wpc->iomap, NULL); +} + +static const struct iomap_writeback_ops zonefs_writeback_ops = { + .map_blocks = zonefs_map_blocks, +}; + +static int zonefs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepage(page, wbc, &wpc, &zonefs_writeback_ops); +} + +static int zonefs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct iomap_writepage_ctx wpc = { }; + + return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); +} + +static const struct address_space_operations zonefs_file_aops = { + .readpage = zonefs_readpage, + .readpages = zonefs_readpages, + .writepage = zonefs_writepage, + .writepages = zonefs_writepages, + .set_page_dirty = iomap_set_page_dirty, + .releasepage = iomap_releasepage, + .invalidatepage = iomap_invalidatepage, + .migratepage = iomap_migrate_page, + .is_partially_uptodate = iomap_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, + .direct_IO = noop_direct_IO, +}; + +static void zonefs_update_stats(struct inode *inode, loff_t new_isize) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t old_isize = i_size_read(inode); + loff_t nr_blocks; + + if (new_isize == old_isize) + return; + + spin_lock(&sbi->s_lock); + + /* + * This may be called for an update after an IO error. + * So beware of the values seen. + */ + if (new_isize < old_isize) { + nr_blocks = (old_isize - new_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > nr_blocks) + sbi->s_used_blocks -= nr_blocks; + else + sbi->s_used_blocks = 0; + } else { + sbi->s_used_blocks += + (new_isize - old_isize) >> sb->s_blocksize_bits; + if (sbi->s_used_blocks > sbi->s_blocks) + sbi->s_used_blocks = sbi->s_blocks; + } + + spin_unlock(&sbi->s_lock); +} + +/* + * Check a zone condition and adjust its file inode access permissions for + * offline and readonly zones. Return the inode size corresponding to the + * amount of readable data in the zone. + */ +static loff_t zonefs_check_zone_condition(struct inode *inode, + struct blk_zone *zone, bool warn) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + switch (zone->cond) { + case BLK_ZONE_COND_OFFLINE: + /* + * Dead zone: make the inode immutable, disable all accesses + * and set the file size to 0 (zone wp set to zone start). + */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: offline zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0777; + zone->wp = zone->start; + return 0; + case BLK_ZONE_COND_READONLY: + /* Do not allow writes in read-only zones */ + if (warn) + zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", + inode->i_ino); + inode->i_flags |= S_IMMUTABLE; + inode->i_mode &= ~0222; + /* fallthrough */ + default: + if (zi->i_ztype == ZONEFS_ZTYPE_CNV) + return zi->i_max_size; + return (zone->wp - zone->start) << SECTOR_SHIFT; + } +} + +struct zonefs_ioerr_data { + struct inode *inode; + bool write; +}; + +static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_ioerr_data *err = data; + struct inode *inode = err->inode; + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + loff_t isize, data_size; + + /* + * Check the zone condition: if the zone is not "bad" (offline or + * read-only), read errors are simply signaled to the IO issuer as long + * as there is no inconsistency between the inode size and the amount of + * data writen in the zone (data_size). + */ + data_size = zonefs_check_zone_condition(inode, zone, true); + isize = i_size_read(inode); + if (zone->cond != BLK_ZONE_COND_OFFLINE && + zone->cond != BLK_ZONE_COND_READONLY && + !err->write && isize == data_size) + return 0; + + /* + * At this point, we detected either a bad zone or an inconsistency + * between the inode size and the amount of data written in the zone. + * For the latter case, the cause may be a write IO error or an external + * action on the device. Two error patterns exist: + * 1) The inode size is lower than the amount of data in the zone: + * a write operation partially failed and data was writen at the end + * of the file. This can happen in the case of a large direct IO + * needing several BIOs and/or write requests to be processed. + * 2) The inode size is larger than the amount of data in the zone: + * this can happen with a deferred write error with the use of the + * device side write cache after getting successful write IO + * completions. Other possibilities are (a) an external corruption, + * e.g. an application reset the zone directly, or (b) the device + * has a serious problem (e.g. firmware bug). + * + * In all cases, warn about inode size inconsistency and handle the + * IO error according to the zone condition and to the mount options. + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size) + zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n", + inode->i_ino, isize, data_size); + + /* + * First handle bad zones signaled by hardware. The mount options + * errors=zone-ro and errors=zone-offline result in changing the + * zone condition to read-only and offline respectively, as if the + * condition was signaled by the hardware. + */ + if (zone->cond == BLK_ZONE_COND_OFFLINE || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) { + zonefs_warn(sb, "inode %lu: read/write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_OFFLINE) { + zone->cond = BLK_ZONE_COND_OFFLINE; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } else if (zone->cond == BLK_ZONE_COND_READONLY || + sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { + zonefs_warn(sb, "inode %lu: write access disabled\n", + inode->i_ino); + if (zone->cond != BLK_ZONE_COND_READONLY) { + zone->cond = BLK_ZONE_COND_READONLY; + data_size = zonefs_check_zone_condition(inode, zone, + false); + } + } + + /* + * If error=remount-ro was specified, any error result in remounting + * the volume as read-only. + */ + if ((sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) && !sb_rdonly(sb)) { + zonefs_warn(sb, "remounting filesystem read-only\n"); + sb->s_flags |= SB_RDONLY; + } + + /* + * Update block usage stats and the inode size to prevent access to + * invalid data. + */ + zonefs_update_stats(inode, data_size); + i_size_write(inode, data_size); + zi->i_wpoffset = data_size; + + return 0; +} + +/* + * When an file IO error occurs, check the file zone to see if there is a change + * in the zone condition (e.g. offline or read-only). For a failed write to a + * sequential zone, the zone write pointer position must also be checked to + * eventually correct the file size and zonefs inode write pointer offset + * (which can be out of sync with the drive due to partial write failures). + */ +static void zonefs_io_error(struct inode *inode, bool write) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + unsigned int noio_flag; + unsigned int nr_zones = + zi->i_max_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + struct zonefs_ioerr_data err = { + .inode = inode, + .write = write, + }; + int ret; + + mutex_lock(&zi->i_truncate_mutex); + + /* + * Memory allocations in blkdev_report_zones() can trigger a memory + * reclaim which may in turn cause a recursion into zonefs as well as + * struct request allocations for the same device. The former case may + * end up in a deadlock on the inode truncate mutex, while the latter + * may prevent IO forward progress. Executing the report zones under + * the GFP_NOIO context avoids both problems. + */ + noio_flag = memalloc_noio_save(); + ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones, + zonefs_io_error_cb, &err); + if (ret != nr_zones) + zonefs_err(sb, "Get inode %lu zone information failed %d\n", + inode->i_ino, ret); + memalloc_noio_restore(noio_flag); + + mutex_unlock(&zi->i_truncate_mutex); +} + +static int zonefs_file_truncate(struct inode *inode, loff_t isize) +{ + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t old_isize; + enum req_opf op; + int ret = 0; + + /* + * Only sequential zone files can be truncated and truncation is allowed + * only down to a 0 size, which is equivalent to a zone reset, and to + * the maximum file size, which is equivalent to a zone finish. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return -EPERM; + + if (!isize) + op = REQ_OP_ZONE_RESET; + else if (isize == zi->i_max_size) + op = REQ_OP_ZONE_FINISH; + else + return -EPERM; + + inode_dio_wait(inode); + + /* Serialize against page faults */ + down_write(&zi->i_mmap_sem); + + /* Serialize against zonefs_iomap_begin() */ + mutex_lock(&zi->i_truncate_mutex); + + old_isize = i_size_read(inode); + if (isize == old_isize) + goto unlock; + + ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, + zi->i_max_size >> SECTOR_SHIFT, GFP_NOFS); + if (ret) { + zonefs_err(inode->i_sb, + "Zone management operation at %llu failed %d", + zi->i_zsector, ret); + goto unlock; + } + + zonefs_update_stats(inode, isize); + truncate_setsize(inode, isize); + zi->i_wpoffset = isize; + +unlock: + mutex_unlock(&zi->i_truncate_mutex); + up_write(&zi->i_mmap_sem); + + return ret; +} + +static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = d_inode(dentry); + int ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + ret = setattr_prepare(dentry, iattr); + if (ret) + return ret; + + /* + * Since files and directories cannot be created nor deleted, do not + * allow setting any write attributes on the sub-directories grouping + * files by zone type. + */ + if ((iattr->ia_valid & ATTR_MODE) && S_ISDIR(inode->i_mode) && + (iattr->ia_mode & 0222)) + return -EPERM; + + if (((iattr->ia_valid & ATTR_UID) && + !uid_eq(iattr->ia_uid, inode->i_uid)) || + ((iattr->ia_valid & ATTR_GID) && + !gid_eq(iattr->ia_gid, inode->i_gid))) { + ret = dquot_transfer(inode, iattr); + if (ret) + return ret; + } + + if (iattr->ia_valid & ATTR_SIZE) { + ret = zonefs_file_truncate(inode, iattr->ia_size); + if (ret) + return ret; + } + + setattr_copy(inode, iattr); + + return 0; +} + +static const struct inode_operations zonefs_file_inode_operations = { + .setattr = zonefs_inode_setattr, +}; + +static int zonefs_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct inode *inode = file_inode(file); + int ret = 0; + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + /* + * Since only direct writes are allowed in sequential files, page cache + * flush is needed only for conventional zone files. + */ + if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV) + ret = file_write_and_wait_range(file, start, end); + if (!ret) + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + + if (ret) + zonefs_io_error(inode, true); + + return ret; +} + +static vm_fault_t zonefs_filemap_fault(struct vm_fault *vmf) +{ + struct zonefs_inode_info *zi = ZONEFS_I(file_inode(vmf->vma->vm_file)); + vm_fault_t ret; + + down_read(&zi->i_mmap_sem); + ret = filemap_fault(vmf); + up_read(&zi->i_mmap_sem); + + return ret; +} + +static vm_fault_t zonefs_filemap_page_mkwrite(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + vm_fault_t ret; + + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + + /* + * Sanity check: only conventional zone files can have shared + * writeable mappings. + */ + if (WARN_ON_ONCE(zi->i_ztype != ZONEFS_ZTYPE_CNV)) + return VM_FAULT_NOPAGE; + + sb_start_pagefault(inode->i_sb); + file_update_time(vmf->vma->vm_file); + + /* Serialize against truncates */ + down_read(&zi->i_mmap_sem); + ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops); + up_read(&zi->i_mmap_sem); + + sb_end_pagefault(inode->i_sb); + return ret; +} + +static const struct vm_operations_struct zonefs_file_vm_ops = { + .fault = zonefs_filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = zonefs_filemap_page_mkwrite, +}; + +static int zonefs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* + * Conventional zones accept random writes, so their files can support + * shared writable mappings. For sequential zone files, only read + * mappings are possible since there are no guarantees for write + * ordering between msync() and page cache writeback. + */ + if (ZONEFS_I(file_inode(file))->i_ztype == ZONEFS_ZTYPE_SEQ && + (vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + + file_accessed(file); + vma->vm_ops = &zonefs_file_vm_ops; + + return 0; +} + +static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence) +{ + loff_t isize = i_size_read(file_inode(file)); + + /* + * Seeks are limited to below the zone size for conventional zones + * and below the zone write pointer for sequential zones. In both + * cases, this limit is the inode size. + */ + return generic_file_llseek_size(file, offset, whence, isize, isize); +} + +static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (error) { + zonefs_io_error(inode, true); + return error; + } + + if (size && zi->i_ztype != ZONEFS_ZTYPE_CNV) { + /* + * Note that we may be seeing completions out of order, + * but that is not a problem since a write completed + * successfully necessarily means that all preceding writes + * were also successful. So we can safely increase the inode + * size to the write end location. + */ + mutex_lock(&zi->i_truncate_mutex); + if (i_size_read(inode) < iocb->ki_pos + size) { + zonefs_update_stats(inode, iocb->ki_pos + size); + i_size_write(inode, iocb->ki_pos + size); + } + mutex_unlock(&zi->i_truncate_mutex); + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_write_dio_ops = { + .end_io = zonefs_file_write_dio_end_io, +}; + +/* + * Handle direct writes. For sequential zone files, this is the only possible + * write path. For these files, check that the user is issuing writes + * sequentially from the end of the file. This code assumes that the block layer + * delivers write requests to the device in sequential order. This is always the + * case if a block IO scheduler implementing the ELEVATOR_F_ZBD_SEQ_WRITE + * elevator feature is being used (e.g. mq-deadline). The block layer always + * automatically select such an elevator for zoned block devices during the + * device initialization. + */ +static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + size_t count; + ssize_t ret; + + /* + * For async direct IOs to sequential zone files, ignore IOCB_NOWAIT + * as this can cause write reordering (e.g. the first aio gets EAGAIN + * on the inode lock but the second goes through but is now unaligned). + */ + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) + && (iocb->ki_flags & IOCB_NOWAIT)) + iocb->ki_flags &= ~IOCB_NOWAIT; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + count = iov_iter_count(from); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + + /* Enforce sequential writes (append only) in sequential zones */ + mutex_lock(&zi->i_truncate_mutex); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) { + mutex_unlock(&zi->i_truncate_mutex); + ret = -EINVAL; + goto inode_unlock; + } + mutex_unlock(&zi->i_truncate_mutex); + + ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops, + &zonefs_write_dio_ops, is_sync_kiocb(iocb)); + if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && + (ret > 0 || ret == -EIOCBQUEUED)) { + if (ret > 0) + count = ret; + mutex_lock(&zi->i_truncate_mutex); + zi->i_wpoffset += count; + mutex_unlock(&zi->i_truncate_mutex); + } + +inode_unlock: + inode_unlock(inode); + + return ret; +} + +static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, + struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + ssize_t ret; + + /* + * Direct IO writes are mandatory for sequential zone files so that the + * write IO issuing order is preserved. + */ + if (zi->i_ztype != ZONEFS_ZTYPE_CNV) + return -EIO; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) + return -EAGAIN; + } else { + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto inode_unlock; + + iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); + + ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); + if (ret > 0) + iocb->ki_pos += ret; + else if (ret == -EIO) + zonefs_io_error(inode, true); + +inode_unlock: + inode_unlock(inode); + if (ret > 0) + ret = generic_write_sync(iocb, ret); + + return ret; +} + +static ssize_t zonefs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (sb_rdonly(inode->i_sb)) + return -EROFS; + + /* Write operations beyond the zone size are not allowed */ + if (iocb->ki_pos >= ZONEFS_I(inode)->i_max_size) + return -EFBIG; + + if (iocb->ki_flags & IOCB_DIRECT) + return zonefs_file_dio_write(iocb, from); + + return zonefs_file_buffered_write(iocb, from); +} + +static int zonefs_file_read_dio_end_io(struct kiocb *iocb, ssize_t size, + int error, unsigned int flags) +{ + if (error) { + zonefs_io_error(file_inode(iocb->ki_filp), false); + return error; + } + + return 0; +} + +static const struct iomap_dio_ops zonefs_read_dio_ops = { + .end_io = zonefs_file_read_dio_end_io, +}; + +static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + loff_t isize; + ssize_t ret; + + /* Offline zones cannot be read */ + if (unlikely(IS_IMMUTABLE(inode) && !(inode->i_mode & 0777))) + return -EPERM; + + if (iocb->ki_pos >= zi->i_max_size) + return 0; + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + + /* Limit read operations to written data */ + mutex_lock(&zi->i_truncate_mutex); + isize = i_size_read(inode); + if (iocb->ki_pos >= isize) { + mutex_unlock(&zi->i_truncate_mutex); + ret = 0; + goto inode_unlock; + } + iov_iter_truncate(to, isize - iocb->ki_pos); + mutex_unlock(&zi->i_truncate_mutex); + + if (iocb->ki_flags & IOCB_DIRECT) { + size_t count = iov_iter_count(to); + + if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { + ret = -EINVAL; + goto inode_unlock; + } + file_accessed(iocb->ki_filp); + ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops, + &zonefs_read_dio_ops, is_sync_kiocb(iocb)); + } else { + ret = generic_file_read_iter(iocb, to); + if (ret == -EIO) + zonefs_io_error(inode, false); + } + +inode_unlock: + inode_unlock_shared(inode); + + return ret; +} + +static const struct file_operations zonefs_file_operations = { + .open = generic_file_open, + .fsync = zonefs_file_fsync, + .mmap = zonefs_file_mmap, + .llseek = zonefs_file_llseek, + .read_iter = zonefs_file_read_iter, + .write_iter = zonefs_file_write_iter, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, + .iopoll = iomap_dio_iopoll, +}; + +static struct kmem_cache *zonefs_inode_cachep; + +static struct inode *zonefs_alloc_inode(struct super_block *sb) +{ + struct zonefs_inode_info *zi; + + zi = kmem_cache_alloc(zonefs_inode_cachep, GFP_KERNEL); + if (!zi) + return NULL; + + inode_init_once(&zi->i_vnode); + mutex_init(&zi->i_truncate_mutex); + init_rwsem(&zi->i_mmap_sem); + + return &zi->i_vnode; +} + +static void zonefs_free_inode(struct inode *inode) +{ + kmem_cache_free(zonefs_inode_cachep, ZONEFS_I(inode)); +} + +/* + * File system stat. + */ +static int zonefs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + enum zonefs_ztype t; + u64 fsid; + + buf->f_type = ZONEFS_MAGIC; + buf->f_bsize = sb->s_blocksize; + buf->f_namelen = ZONEFS_NAME_MAX; + + spin_lock(&sbi->s_lock); + + buf->f_blocks = sbi->s_blocks; + if (WARN_ON(sbi->s_used_blocks > sbi->s_blocks)) + buf->f_bfree = 0; + else + buf->f_bfree = buf->f_blocks - sbi->s_used_blocks; + buf->f_bavail = buf->f_bfree; + + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + if (sbi->s_nr_files[t]) + buf->f_files += sbi->s_nr_files[t] + 1; + } + buf->f_ffree = 0; + + spin_unlock(&sbi->s_lock); + + fsid = le64_to_cpup((void *)sbi->s_uuid.b) ^ + le64_to_cpup((void *)sbi->s_uuid.b + sizeof(u64)); + buf->f_fsid.val[0] = (u32)fsid; + buf->f_fsid.val[1] = (u32)(fsid >> 32); + + return 0; +} + +enum { + Opt_errors_ro, Opt_errors_zro, Opt_errors_zol, Opt_errors_repair, + Opt_err, +}; + +static const match_table_t tokens = { + { Opt_errors_ro, "errors=remount-ro"}, + { Opt_errors_zro, "errors=zone-ro"}, + { Opt_errors_zol, "errors=zone-offline"}, + { Opt_errors_repair, "errors=repair"}, + { Opt_err, NULL} +}; + +static int zonefs_parse_options(struct super_block *sb, char *options) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_errors_ro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_RO; + break; + case Opt_errors_zro: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZRO; + break; + case Opt_errors_zol: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_ZOL; + break; + case Opt_errors_repair: + sbi->s_mount_opts &= ~ZONEFS_MNTOPT_ERRORS_MASK; + sbi->s_mount_opts |= ZONEFS_MNTOPT_ERRORS_REPAIR; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static int zonefs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(root->d_sb); + + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO) + seq_puts(seq, ",errors=remount-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) + seq_puts(seq, ",errors=zone-ro"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZOL) + seq_puts(seq, ",errors=zone-offline"); + if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_REPAIR) + seq_puts(seq, ",errors=repair"); + + return 0; +} + +static int zonefs_remount(struct super_block *sb, int *flags, char *data) +{ + sync_filesystem(sb); + + return zonefs_parse_options(sb, data); +} + +static const struct super_operations zonefs_sops = { + .alloc_inode = zonefs_alloc_inode, + .free_inode = zonefs_free_inode, + .statfs = zonefs_statfs, + .remount_fs = zonefs_remount, + .show_options = zonefs_show_options, +}; + +static const struct inode_operations zonefs_dir_inode_operations = { + .lookup = simple_lookup, + .setattr = zonefs_inode_setattr, +}; + +static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, + enum zonefs_ztype type) +{ + struct super_block *sb = parent->i_sb; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1; + inode_init_owner(inode, parent, S_IFDIR | 0555); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + inc_nlink(parent); +} + +static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; + inode->i_mode = S_IFREG | sbi->s_perm; + + zi->i_ztype = type; + zi->i_zsector = zone->start; + zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, + zone->len << SECTOR_SHIFT); + zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true); + + inode->i_uid = sbi->s_uid; + inode->i_gid = sbi->s_gid; + inode->i_size = zi->i_wpoffset; + inode->i_blocks = zone->len; + + inode->i_op = &zonefs_file_inode_operations; + inode->i_fop = &zonefs_file_operations; + inode->i_mapping->a_ops = &zonefs_file_aops; + + sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); + sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; + sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; +} + +static struct dentry *zonefs_create_inode(struct dentry *parent, + const char *name, struct blk_zone *zone, + enum zonefs_ztype type) +{ + struct inode *dir = d_inode(parent); + struct dentry *dentry; + struct inode *inode; + + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + inode = new_inode(parent->d_sb); + if (!inode) + goto dput; + + inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; + if (zone) + zonefs_init_file_inode(inode, zone, type); + else + zonefs_init_dir_inode(dir, inode, type); + d_add(dentry, inode); + dir->i_size++; + + return dentry; + +dput: + dput(dentry); + + return NULL; +} + +struct zonefs_zone_data { + struct super_block *sb; + unsigned int nr_zones[ZONEFS_ZTYPE_MAX]; + struct blk_zone *zones; +}; + +/* + * Create a zone group and populate it with zone files. + */ +static int zonefs_create_zgroup(struct zonefs_zone_data *zd, + enum zonefs_ztype type) +{ + struct super_block *sb = zd->sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct blk_zone *zone, *next, *end; + const char *zgroup_name; + char *file_name; + struct dentry *dir; + unsigned int n = 0; + int ret = -ENOMEM; + + /* If the group is empty, there is nothing to do */ + if (!zd->nr_zones[type]) + return 0; + + file_name = kmalloc(ZONEFS_NAME_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + if (type == ZONEFS_ZTYPE_CNV) + zgroup_name = "cnv"; + else + zgroup_name = "seq"; + + dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); + if (!dir) + goto free; + + /* + * The first zone contains the super block: skip it. + */ + end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk); + for (zone = &zd->zones[1]; zone < end; zone = next) { + + next = zone + 1; + if (zonefs_zone_type(zone) != type) + continue; + + /* + * For conventional zones, contiguous zones can be aggregated + * together to form larger files. Note that this overwrites the + * length of the first zone of the set of contiguous zones + * aggregated together. If one offline or read-only zone is + * found, assume that all zones aggregated have the same + * condition. + */ + if (type == ZONEFS_ZTYPE_CNV && + (sbi->s_features & ZONEFS_F_AGGRCNV)) { + for (; next < end; next++) { + if (zonefs_zone_type(next) != type) + break; + zone->len += next->len; + if (next->cond == BLK_ZONE_COND_READONLY && + zone->cond != BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_READONLY; + else if (next->cond == BLK_ZONE_COND_OFFLINE) + zone->cond = BLK_ZONE_COND_OFFLINE; + } + } + + /* + * Use the file number within its group as file name. + */ + snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); + if (!zonefs_create_inode(dir, file_name, zone, type)) + goto free; + + n++; + } + + zonefs_info(sb, "Zone group \"%s\" has %u file%s\n", + zgroup_name, n, n > 1 ? "s" : ""); + + sbi->s_nr_files[type] = n; + ret = 0; + +free: + kfree(file_name); + + return ret; +} + +static int zonefs_get_zone_info_cb(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zonefs_zone_data *zd = data; + + /* + * Count the number of usable zones: the first zone at index 0 contains + * the super block and is ignored. + */ + switch (zone->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: + zone->wp = zone->start + zone->len; + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_CNV]++; + break; + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + if (idx) + zd->nr_zones[ZONEFS_ZTYPE_SEQ]++; + break; + default: + zonefs_err(zd->sb, "Unsupported zone type 0x%x\n", + zone->type); + return -EIO; + } + + memcpy(&zd->zones[idx], zone, sizeof(struct blk_zone)); + + return 0; +} + +static int zonefs_get_zone_info(struct zonefs_zone_data *zd) +{ + struct block_device *bdev = zd->sb->s_bdev; + int ret; + + zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk), + sizeof(struct blk_zone), GFP_KERNEL); + if (!zd->zones) + return -ENOMEM; + + /* Get zones information from the device */ + ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, + zonefs_get_zone_info_cb, zd); + if (ret < 0) { + zonefs_err(zd->sb, "Zone report failed %d\n", ret); + return ret; + } + + if (ret != blkdev_nr_zones(bdev->bd_disk)) { + zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", + ret, blkdev_nr_zones(bdev->bd_disk)); + return -EIO; + } + + return 0; +} + +static inline void zonefs_cleanup_zone_info(struct zonefs_zone_data *zd) +{ + kvfree(zd->zones); +} + +/* + * Read super block information from the device. + */ +static int zonefs_read_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + struct zonefs_super *super; + u32 crc, stored_crc; + struct page *page; + struct bio_vec bio_vec; + struct bio bio; + int ret; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + bio_init(&bio, &bio_vec, 1); + bio.bi_iter.bi_sector = 0; + bio.bi_opf = REQ_OP_READ; + bio_set_dev(&bio, sb->s_bdev); + bio_add_page(&bio, page, PAGE_SIZE, 0); + + ret = submit_bio_wait(&bio); + if (ret) + goto free_page; + + super = kmap(page); + + ret = -EINVAL; + if (le32_to_cpu(super->s_magic) != ZONEFS_MAGIC) + goto unmap; + + stored_crc = le32_to_cpu(super->s_crc); + super->s_crc = 0; + crc = crc32(~0U, (unsigned char *)super, sizeof(struct zonefs_super)); + if (crc != stored_crc) { + zonefs_err(sb, "Invalid checksum (Expected 0x%08x, got 0x%08x)", + crc, stored_crc); + goto unmap; + } + + sbi->s_features = le64_to_cpu(super->s_features); + if (sbi->s_features & ~ZONEFS_F_DEFINED_FEATURES) { + zonefs_err(sb, "Unknown features set 0x%llx\n", + sbi->s_features); + goto unmap; + } + + if (sbi->s_features & ZONEFS_F_UID) { + sbi->s_uid = make_kuid(current_user_ns(), + le32_to_cpu(super->s_uid)); + if (!uid_valid(sbi->s_uid)) { + zonefs_err(sb, "Invalid UID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_GID) { + sbi->s_gid = make_kgid(current_user_ns(), + le32_to_cpu(super->s_gid)); + if (!gid_valid(sbi->s_gid)) { + zonefs_err(sb, "Invalid GID feature\n"); + goto unmap; + } + } + + if (sbi->s_features & ZONEFS_F_PERM) + sbi->s_perm = le32_to_cpu(super->s_perm); + + if (memchr_inv(super->s_reserved, 0, sizeof(super->s_reserved))) { + zonefs_err(sb, "Reserved area is being used\n"); + goto unmap; + } + + uuid_copy(&sbi->s_uuid, (uuid_t *)super->s_uuid); + ret = 0; + +unmap: + kunmap(page); +free_page: + __free_page(page); + + return ret; +} + +/* + * Check that the device is zoned. If it is, get the list of zones and create + * sub-directories and files according to the device zone configuration and + * format options. + */ +static int zonefs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct zonefs_zone_data zd; + struct zonefs_sb_info *sbi; + struct inode *inode; + enum zonefs_ztype t; + int ret; + + if (!bdev_is_zoned(sb->s_bdev)) { + zonefs_err(sb, "Not a zoned block device\n"); + return -EINVAL; + } + + /* + * Initialize super block information: the maximum file size is updated + * when the zone files are created so that the format option + * ZONEFS_F_AGGRCNV which increases the maximum file size of a file + * beyond the zone size is taken into account. + */ + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + spin_lock_init(&sbi->s_lock); + sb->s_fs_info = sbi; + sb->s_magic = ZONEFS_MAGIC; + sb->s_maxbytes = 0; + sb->s_op = &zonefs_sops; + sb->s_time_gran = 1; + + /* + * The block size is set to the device physical sector size to ensure + * that write operations on 512e devices (512B logical block and 4KB + * physical block) are always aligned to the device physical blocks, + * as mandated by the ZBC/ZAC specifications. + */ + sb_set_blocksize(sb, bdev_physical_block_size(sb->s_bdev)); + sbi->s_zone_sectors_shift = ilog2(bdev_zone_sectors(sb->s_bdev)); + sbi->s_uid = GLOBAL_ROOT_UID; + sbi->s_gid = GLOBAL_ROOT_GID; + sbi->s_perm = 0640; + sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; + + ret = zonefs_read_super(sb); + if (ret) + return ret; + + ret = zonefs_parse_options(sb, data); + if (ret) + return ret; + + memset(&zd, 0, sizeof(struct zonefs_zone_data)); + zd.sb = sb; + ret = zonefs_get_zone_info(&zd); + if (ret) + goto cleanup; + + zonefs_info(sb, "Mounting %u zones", + blkdev_nr_zones(sb->s_bdev->bd_disk)); + + /* Create root directory inode */ + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto cleanup; + + inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk); + inode->i_mode = S_IFDIR | 0555; + inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode); + inode->i_op = &zonefs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + goto cleanup; + + /* Create and populate files in zone groups directories */ + for (t = 0; t < ZONEFS_ZTYPE_MAX; t++) { + ret = zonefs_create_zgroup(&zd, t); + if (ret) + break; + } + +cleanup: + zonefs_cleanup_zone_info(&zd); + + return ret; +} + +static struct dentry *zonefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, zonefs_fill_super); +} + +static void zonefs_kill_super(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + + if (sb->s_root) + d_genocide(sb->s_root); + kill_block_super(sb); + kfree(sbi); +} + +/* + * File system definition and registration. + */ +static struct file_system_type zonefs_type = { + .owner = THIS_MODULE, + .name = "zonefs", + .mount = zonefs_mount, + .kill_sb = zonefs_kill_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init zonefs_init_inodecache(void) +{ + zonefs_inode_cachep = kmem_cache_create("zonefs_inode_cache", + sizeof(struct zonefs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), + NULL); + if (zonefs_inode_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void zonefs_destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy the inode cache. + */ + rcu_barrier(); + kmem_cache_destroy(zonefs_inode_cachep); +} + +static int __init zonefs_init(void) +{ + int ret; + + BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE); + + ret = zonefs_init_inodecache(); + if (ret) + return ret; + + ret = register_filesystem(&zonefs_type); + if (ret) { + zonefs_destroy_inodecache(); + return ret; + } + + return 0; +} + +static void __exit zonefs_exit(void) +{ + zonefs_destroy_inodecache(); + unregister_filesystem(&zonefs_type); +} + +MODULE_AUTHOR("Damien Le Moal"); +MODULE_DESCRIPTION("Zone file system for zoned block devices"); +MODULE_LICENSE("GPL"); +module_init(zonefs_init); +module_exit(zonefs_exit); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h new file mode 100644 index 000000000000..ad17fef7ce91 --- /dev/null +++ b/fs/zonefs/zonefs.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Simple zone file system for zoned block devices. + * + * Copyright (C) 2019 Western Digital Corporation or its affiliates. + */ +#ifndef __ZONEFS_H__ +#define __ZONEFS_H__ + +#include <linux/fs.h> +#include <linux/magic.h> +#include <linux/uuid.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> + +/* + * Maximum length of file names: this only needs to be large enough to fit + * the zone group directory names and a decimal zone number for file names. + * 16 characters is plenty. + */ +#define ZONEFS_NAME_MAX 16 + +/* + * Zone types: ZONEFS_ZTYPE_SEQ is used for all sequential zone types + * defined in linux/blkzoned.h, that is, BLK_ZONE_TYPE_SEQWRITE_REQ and + * BLK_ZONE_TYPE_SEQWRITE_PREF. + */ +enum zonefs_ztype { + ZONEFS_ZTYPE_CNV, + ZONEFS_ZTYPE_SEQ, + ZONEFS_ZTYPE_MAX, +}; + +static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) +{ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return ZONEFS_ZTYPE_CNV; + return ZONEFS_ZTYPE_SEQ; +} + +/* + * In-memory inode data. + */ +struct zonefs_inode_info { + struct inode i_vnode; + + /* File zone type */ + enum zonefs_ztype i_ztype; + + /* File zone start sector (512B unit) */ + sector_t i_zsector; + + /* File zone write pointer position (sequential zones only) */ + loff_t i_wpoffset; + + /* File maximum size */ + loff_t i_max_size; + + /* + * To serialise fully against both syscall and mmap based IO and + * sequential file truncation, two locks are used. For serializing + * zonefs_seq_file_truncate() against zonefs_iomap_begin(), that is, + * file truncate operations against block mapping, i_truncate_mutex is + * used. i_truncate_mutex also protects against concurrent accesses + * and changes to the inode private data, and in particular changes to + * a sequential file size on completion of direct IO writes. + * Serialization of mmap read IOs with truncate and syscall IO + * operations is done with i_mmap_sem in addition to i_truncate_mutex. + * Only zonefs_seq_file_truncate() takes both lock (i_mmap_sem first, + * i_truncate_mutex second). + */ + struct mutex i_truncate_mutex; + struct rw_semaphore i_mmap_sem; +}; + +static inline struct zonefs_inode_info *ZONEFS_I(struct inode *inode) +{ + return container_of(inode, struct zonefs_inode_info, i_vnode); +} + +/* + * On-disk super block (block 0). + */ +#define ZONEFS_LABEL_LEN 64 +#define ZONEFS_UUID_SIZE 16 +#define ZONEFS_SUPER_SIZE 4096 + +struct zonefs_super { + + /* Magic number */ + __le32 s_magic; + + /* Checksum */ + __le32 s_crc; + + /* Volume label */ + char s_label[ZONEFS_LABEL_LEN]; + + /* 128-bit uuid */ + __u8 s_uuid[ZONEFS_UUID_SIZE]; + + /* Features */ + __le64 s_features; + + /* UID/GID to use for files */ + __le32 s_uid; + __le32 s_gid; + + /* File permissions */ + __le32 s_perm; + + /* Padding to ZONEFS_SUPER_SIZE bytes */ + __u8 s_reserved[3988]; + +} __packed; + +/* + * Feature flags: specified in the s_features field of the on-disk super + * block struct zonefs_super and in-memory in the s_feartures field of + * struct zonefs_sb_info. + */ +enum zonefs_features { + /* + * Aggregate contiguous conventional zones into a single file. + */ + ZONEFS_F_AGGRCNV = 1ULL << 0, + /* + * Use super block specified UID for files instead of default 0. + */ + ZONEFS_F_UID = 1ULL << 1, + /* + * Use super block specified GID for files instead of default 0. + */ + ZONEFS_F_GID = 1ULL << 2, + /* + * Use super block specified file permissions instead of default 640. + */ + ZONEFS_F_PERM = 1ULL << 3, +}; + +#define ZONEFS_F_DEFINED_FEATURES \ + (ZONEFS_F_AGGRCNV | ZONEFS_F_UID | ZONEFS_F_GID | ZONEFS_F_PERM) + +/* + * Mount options for zone write pointer error handling. + */ +#define ZONEFS_MNTOPT_ERRORS_RO (1 << 0) /* Make zone file readonly */ +#define ZONEFS_MNTOPT_ERRORS_ZRO (1 << 1) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_ZOL (1 << 2) /* Make zone file offline */ +#define ZONEFS_MNTOPT_ERRORS_REPAIR (1 << 3) /* Remount read-only */ +#define ZONEFS_MNTOPT_ERRORS_MASK \ + (ZONEFS_MNTOPT_ERRORS_RO | ZONEFS_MNTOPT_ERRORS_ZRO | \ + ZONEFS_MNTOPT_ERRORS_ZOL | ZONEFS_MNTOPT_ERRORS_REPAIR) + +/* + * In-memory Super block information. + */ +struct zonefs_sb_info { + + unsigned long s_mount_opts; + + spinlock_t s_lock; + + unsigned long long s_features; + kuid_t s_uid; + kgid_t s_gid; + umode_t s_perm; + uuid_t s_uuid; + unsigned int s_zone_sectors_shift; + + unsigned int s_nr_files[ZONEFS_ZTYPE_MAX]; + + loff_t s_blocks; + loff_t s_used_blocks; +}; + +static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +#define zonefs_info(sb, format, args...) \ + pr_info("zonefs (%s): " format, sb->s_id, ## args) +#define zonefs_err(sb, format, args...) \ + pr_err("zonefs (%s) ERROR: " format, sb->s_id, ## args) +#define zonefs_warn(sb, format, args...) \ + pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) + +#endif |